描述
Typha:在節點數比較多的情況下,Felix 可通過 Typha 直接和 Etcd 進行數據交互,不通過 kube-apiserver,既降低其壓力。(有待驗證)
引入網上他人圖片,恩畫的非常好
- BGPClient(BIRD):把 Felix寫入 kernel的路由信息分發到當前 Calico網絡,確保 workload 間的通信
- BGPRoute Reflector(BIRD):大規模部署時使用,通過一個或者多個 BGPRoute Reflector 來完成集中式的路由分發;後端中有新的規則加入時,Route Reflector 就會將新的記錄同步
1. TyphaDaemon New 函數
NewClientV3 建立客戶端連接,看着像 etcd,接着看看怎麼使用的,github.com/projectcalico/libcalico-go/lib/backend/client.go 中定義了 backend.NewClient,可以使用 etcd 與 kubernetes
ConfigureEarlyLogging 配置日誌
func New() *TyphaDaemon {
return &TyphaDaemon{
NewClientV3: func(config apiconfig.CalicoAPIConfig) (DatastoreClient, error) {
client, err := clientv3.New(config)
if err != nil {
return nil, err
}
return ClientV3Shim{client.(RealClientV3), config}, nil
},
ConfigureEarlyLogging: logutils.ConfigureEarlyLogging,
ConfigureLogging: logutils.ConfigureLogging,
CachesBySyncerType: map[syncproto.SyncerType]syncserver.BreadcrumbProvider{},
}
}
2. InitializeAndServeForever 函數
DoEarlyRuntimeSetup 主要是前期設置日誌格式以及日誌級別,主要用於配置的日誌記錄
ParseCommandLineArgs 這個分析命令行參數,--config-file:/etc/calico/typha.cfg
[global] MetadataAddr = None LogFilePath = None LogSeverityFile = None
LoadConfiguration 這個函數內容挺多,pkg/config/config_params.go 中 結構體 Config,初始化使用了默認值,使用反射機制,字段非常多,如下一部分,從環境變量中 load 前綴爲 typha 的 key,在從配置文件 /etc/calico/typha.cfg 中讀取
DatastoreType string `config:"oneof(kubernetes,etcdv3);etcdv3;non-zero,die-on-fail"` EtcdAddr string `config:"authority;127.0.0.1:2379;local"` EtcdScheme string `config:"oneof(http,https);http;local"` EtcdKeyFile string `config:"file(must-exist);;local"` EtcdCertFile string `config:"file(must-exist);;local"` EtcdCaFile string `config:"file(must-exist);;local"` EtcdEndpoints []string `config:"endpoint-list;;local"`
func (t *TyphaDaemon) InitializeAndServeForever(cxt context.Context) error {
t.DoEarlyRuntimeSetup()
t.ParseCommandLineArgs(nil)
err := t.LoadConfiguration(cxt)
if err != nil { // Should only happen if context is canceled.
return err
}
t.CreateServer()
t.Start(cxt)
t.WaitAndShutDown(cxt)
return nil
}
2.1 CreateServer 函數
創建以及配置服務組件,就是不啓動
FelixSyncerByIface 調用 felixsyncer.New 創建 felix syncer,實現在 github.com/projectcalico/libcalico-go/lib/backend/sycersv1/felixsyncer/felixsyncerv1.go, 定義了一大隊 watchersyncer.ResourceType,實例化 watcherSyncer,實現在 github.com/projectcalico/libcalico-go/lib/backend/watchersyncer/watchersyncer.go 中,具體待定分析
BGPSyncerByIface 調用 bgpsyncer.New 創建 bgp syncer,實現在 github.com/projectcalico/libcalico-go/lib/backend/syncersyv1/bgpsyncer/bgpsyncer.go,同樣實例化 watchersyncer
// CreateServer creates and configures (but does not start) the server components.
func (t *TyphaDaemon) CreateServer() {
// Health monitoring, for liveness and readiness endpoints.
t.healthAggregator = health.NewHealthAggregator()
// Now create the Syncer and caching layer (one pipeline for each syncer we support).
t.addSyncerPipeline(syncproto.SyncerTypeFelix, t.DatastoreClient.FelixSyncerByIface)
t.addSyncerPipeline(syncproto.SyncerTypeBGP, t.DatastoreClient.BGPSyncerByIface)
}
2.1.1 addSyncerPipeline 函數
calc.NewSyncerCallbacksDecoupler() 主要是將數據從 syncer 發給 validator,實現在 pkg/calc/async_decoupler.go
calc.NewSyncerCallbacksDecoupler 主要是將數據從 validator 到 cache
snapcache.New 創建 snapshot cache,具體在第 3 章節講解
func (t *TyphaDaemon) addSyncerPipeline(
syncerType syncproto.SyncerType,
newSyncer func(callbacks bapi.SyncerCallbacks) bapi.Syncer,
) {
// Get a Syncer from the datastore, which will feed the validator layer with updates.
syncerToValidator := calc.NewSyncerCallbacksDecoupler()
syncer := newSyncer(syncerToValidator)
log.Debugf("Created Syncer: %#v", syncer)
// Create the validator, which sits between the syncer and the cache.
validatorToCache := calc.NewSyncerCallbacksDecoupler()
validator := calc.NewValidationFilter(validatorToCache)
// Create our snapshot cache, which stores point-in-time copies of the datastore contents.
cache := snapcache.New(snapcache.Config{
MaxBatchSize: t.ConfigParams.SnapshotCacheMaxBatchSize,
HealthAggregator: t.healthAggregator,
})
pipeline := &syncerPipeline{
Type: syncerType,
Syncer: syncer,
SyncerToValidator: syncerToValidator,
Validator: validator,
ValidatorToCache: validatorToCache,
Cache: cache,
}
t.SyncerPipelines = append(t.SyncerPipelines, pipeline)
t.CachesBySyncerType[syncerType] = cache
}
2.2 TyphaDaemon Start 函數
p.Cache.Start 第 3.1 章節講解
func (p syncerPipeline) Start(cxt context.Context) {
logCxt := log.WithField("syncerType", p.Type)
logCxt.Info("Starting syncer")
p.Syncer.Start()
logCxt.Info("Starting syncer-to-validator decoupler")
go p.SyncerToValidator.SendTo(p.Validator)
logCxt.Info("Starting validator-to-cache decoupler")
go p.ValidatorToCache.SendTo(p.Cache)
logCxt.Info("Starting cache")
p.Cache.Start(cxt)
logCxt.Info("Started syncer pipeline")
}
2.2.1 從 syncer 發送給 validator 的 channel
func (a *SyncerCallbacksDecoupler) SendToContext(cxt context.Context, sink api.SyncerCallbacks) {
for {
select {
case obj := <-a.c:
switch obj := obj.(type) {
case api.SyncStatus:
sink.OnStatusUpdated(obj)
case []api.Update:
sink.OnUpdates(obj)
}
case <-cxt.Done():
logrus.WithError(cxt.Err()).Info("Context asked us to stop")
return
}
}
}
2.3 syncserver Start 函數
server 函數內容比較多,但是內容比較簡單,處理請求,緩存連接 conn,最後調用 connection.handle 處理
func (s *Server) Start(cxt context.Context) {
s.Finished.Add(2)
go s.serve(cxt)
go s.governNumberOfConnections(cxt)
}
2.3.1 connection.handle 函數
doHandshake處理握手請求,類型 MsgClientHello
sendSnapshotAndUpdatesToClient 異步發送 snapshot 更新
sendPingsToClient 定期的 ping-pong 請求
2.4 backend 後端爲 kubernetes
if t.ConfigParams.ConnectionRebalancingMode == "kubernetes" {
log.Info("Kubernetes connection rebalancing is enabled, starting k8s poll goroutine.")
k8sAPI := k8s.NewK8sAPI()
ticker := jitter.NewTicker(
t.ConfigParams.K8sServicePollIntervalSecs,
t.ConfigParams.K8sServicePollIntervalSecs/10)
go k8s.PollK8sForConnectionLimit(cxt, t.ConfigParams, ticker.C, k8sAPI, t.Server)
}
log.Info("Started the datastore Syncer/cache layer/server.")
3. snapcache New 函數
Ctrie 中的每個節點都有一個和它相關聯的同伴節點,當進行快照時,root節點都會被拷貝到一個新的節點,當樹中的節點被訪問時,也會被惰性拷貝到新的節點(持久化數據結構),這樣的快照操作是常數耗時的。
Ctrie 跟同步map或者跳躍表比起來,插入操作更耗時一些,因爲尋址操作變多了。Ctrie真正的優勢是內存消耗,跟大多的Hash表不同,它總是一系列在tree中的keys。另一個性能優勢就是它可以在常量時間內完成線性快照。
func New(config Config) *Cache {
config.ApplyDefaults()
kvs := ctrie.New(nil /*default hash factory*/)
cond := sync.NewCond(&sync.Mutex{})
snap := &Breadcrumb{
Timestamp: time.Now(),
nextCond: cond,
KVs: kvs.ReadOnlySnapshot(),
}
c := &Cache{
config: config,
inputC: make(chan interface{}, config.MaxBatchSize*2),
breadcrumbCond: cond,
kvs: kvs,
currentBreadcrumb: (unsafe.Pointer)(snap),
wakeUpTicker: jitter.NewTicker(config.WakeUpInterval, config.WakeUpInterval/10),
healthTicks: time.NewTicker(healthInterval).C,
}
if config.HealthAggregator != nil {
config.HealthAggregator.RegisterReporter(healthName, &health.HealthReport{Live: true, Ready: true}, healthInterval*2)
}
c.reportHealth()
return c
}
3.1 Cache Start 函數
// Start starts the cache's main loop in a background goroutine.
func (c *Cache) Start(ctx context.Context) {
go c.loop(ctx)
}
func (c *Cache) loop(ctx context.Context) {
for {
// First, block, waiting for updates and batch them up in our pendingXXX fields.
// This will opportunistically slurp up a limited number of pending updates.
if err := c.fillBatchFromInputQueue(ctx); err != nil {
log.WithError(err).Error("Snapshot main loop exiting.")
return
}
// Then publish the updates in new Breadcrumb(s).
c.publishBreadcrumbs()
}
}
3.1.1 fillBatchFromInputQueue 函數
處理 Cache inputC channel,把類型爲 update 的存入 pending,publishBreadCrumbs 處理 pending 的 update
// fillBatchFromInputQueue waits for some input on the input channel, then opportunistically
// pulls as much as possible from the channel. Input is stored in the pendingXXX fields for
// the next stage of processing.
func (c *Cache) fillBatchFromInputQueue(ctx context.Context) error {
batchSize := 0
storePendingUpdate := func(obj interface{}) {
log.Debug("Waiting for next input...")
select {
case obj := <-c.inputC:
log.WithField("update", obj).Debug("Got first update, peeking...")
storePendingUpdate(obj)
batchLoop:
for batchSize < c.config.MaxBatchSize {
select {
case obj = <-c.inputC:
storePendingUpdate(obj)
case <-ctx.Done():
log.WithError(ctx.Err()).Info("Context is done. Stopping.")
return ctx.Err()
default:
break batchLoop
}
}
log.WithField("numUpdates", batchSize).Debug("Finished reading batch.")
3.1.2 publishBreadcrumb
更新 master Ctrie 發佈新的 Breadcrumb,其中包含只讀 snapshot
4. watcherSyncer
watcherSyncer 結構體實現了 api.Syncer 接口,兩個方法,Start() 和 Stop()
// watcherSyncer implements the api.Syncer interface.
type watcherSyncer struct {
status api.SyncStatus
watcherCaches []*watcherCache
results chan interface{}
numSynced int
callbacks api.SyncerCallbacks
wgwc *sync.WaitGroup
wgws *sync.WaitGroup
cancel context.CancelFunc
}
4.1 Start() 函數調用的 run 方法
run 函數實現了主要的邏輯 syncer,首先發送 wait-for-ready 狀態,等待連接到 datastore。目前有兩個 watcherSyncer,felix 與 bgp。
對每一種 syncer 註冊的 resourceType,也就是 watcherCache 中處理 run,第 5.1 章節講解
// run implements the main syncer loop that loops forever receiving watch events and translating
// to syncer updates.
func (ws *watcherSyncer) run(ctx context.Context) {
log.Debug("Sending initial status event and starting watchers")
ws.wgws.Add(1)
ws.sendStatusUpdate(api.WaitForDatastore)
for _, wc := range ws.watcherCaches {
ws.wgwc.Add(1)
go func(wc *watcherCache) {
wc.run(ctx)
log.Debug("Watcher cache run completed")
ws.wgwc.Done()
}(wc)
}
4.2 watcherSyncer 處理 channel 中 results
log.Info("Starting main event processing loop")
var updates []api.Update
for result := range ws.results {
// Process the data - this will append the data in subsequent calls, and action
// it if we hit a non-update event.
updates := ws.processResult(updates, result)
// Append results into the one update until we either flush the channel or we
// hit our fixed limit per update.
consolidatationloop:
for ii := 0; ii < maxUpdatesToConsolidate; ii++ {
select {
case next := <-ws.results:
updates = ws.processResult(updates, next)
default:
break consolidatationloop
}
}
// Perform final processing (pass in a nil result) before we loop and hit the blocking
// call again.
updates = ws.sendUpdates(updates)
}
4.3 processResult 函數處理 result channel
不採取立刻更新操作,分組發送 felix 更新,類型有 api.Update error api.SyncStatus
// Process a result from the result channel. We don't immediately action updates, but
// instead start grouping them together so that we can send a larger single update to
// Felix.
func (ws *watcherSyncer) processResult(updates []api.Update, result interface{}) []api.Update {
// Switch on the result type.
switch r := result.(type) {
case []api.Update:
// This is an update. If we don't have previous updates then also check to see
// if we need to shift the status into Resync.
// We append these updates to the previous if there were any.
if len(updates) == 0 && ws.status == api.WaitForDatastore {
ws.sendStatusUpdate(api.ResyncInProgress)
}
updates = append(updates, r...)
5. watcherCache
主要是 results channel,無類型,只接收 error,api.Update,api.SyncStatus
// The watcherCache provides watcher/syncer support for a single key type in the
// backend. These results are sent to the main WatcherSyncer on a buffered "results"
// channel. To ensure the order of events is received correctly by the main WatcherSyncer,
// we send all notification types in this channel. Note that because of this the results
// channel is untyped - however the watcherSyncer only expects one of the following
// types:
// - An error
// - An api.Update
// - A api.SyncStatus (only for the very first InSync notification)
type watcherCache struct {
logger *logrus.Entry
client api.Client
watch api.WatchInterface
resources map[string]cacheEntry
oldResources map[string]cacheEntry
results chan<- interface{}
hasSynced bool
errors int
resourceType ResourceType
currentWatchRevision string
}
5.1 watcherCache run 函數
// run creates the watcher and loops indefinitely reading from the watcher.
func (wc *watcherCache) run(ctx context.Context) {
wc.logger.Debug("Watcher cache starting, start initial sync processing")
wc.resyncAndCreateWatcher(ctx)
5.1.1 resyncAndCreateWatcher 函數,循環實現resync,直到成功完成了重新同步啓動了watcher
// resyncAndCreateWatcher loops performing resync processing until it successfully
// completes a resync and starts a watcher.
func (wc *watcherCache) resyncAndCreateWatcher(ctx context.Context) {
// The passed in context allows a resync to be stopped mid-resync. The resync should be stopped as quickly as
// possible, but there should be usable data available in wc.resources so that delete events can be sent.
// The strategy is to
// - cancel any long running functions calls made from here, i.e. pass ctx to the client.list() calls
// - but if it finishes, then ensure that the listing gets processed.
// - cancel any sleeps if the context is cancelled
5.1.1.1 如果沒有版本,那就實行全部同步
// If we don't have a currentWatchRevision then we need to perform a full resync.
performFullResync := wc.currentWatchRevision == ""
5.1.1.2 實行全部同步
syncer | resourcetype | 實現路徑 | |
felix | configUpdateProcessor | github.com/projectcalico/libcalico-go/lib/backend/syncersv1/updateprocessors/configurationprocessor.go | |
felix | |||
UpdateProcessor.OnSyncerStarting 主要是開關功能
client.List 從後端獲取該資源,本文使用的是 kubernetes 後端
handleWatchListEvent 對每一 KV,調用 UpdateProcessor的 Process 方法,對於符合的則調用 handleAddedOrModifiedUpdate 更新 KV,其實就是往 channel 塞值 wc.results <- []api.Update
if performFullResync {
// Notify the converter that we are resyncing.
if wc.resourceType.UpdateProcessor != nil {
wc.logger.Debug("Trigger converter resync notification")
wc.resourceType.UpdateProcessor.OnSyncerStarting()
}
// Start the sync by Listing the current resources.
l, err := wc.client.List(ctx, wc.resourceType.ListInterface, "")
// Once this point is reached, it's important not to drop out if the context is cancelled.
// Move the current resources over to the oldResources
wc.oldResources = wc.resources
wc.resources = make(map[string]cacheEntry, 0)
// Send updates for each of the resources we listed - this will revalidate entries in
// the oldResources map.
for _, kvp := range l.KVPairs {
wc.handleWatchListEvent(kvp)
}
// We've listed the current settings. Complete the sync by notifying the main WatcherSyncer
// go routine (if we haven't already) and by sending deletes for the old resources that were
// not acknowledged by the List. The oldResources will be empty after this call.
wc.finishResync()
// Store the current watch revision. This gets updated on any new add/modified event.
wc.currentWatchRevision = l.Revision
}
5.1.1.3 從現在的版本進行 watch 操作
// And now start watching from the revision returned by the List, or from a previous watch event
// (depending on whether we were performing a full resync).
w, err := wc.client.Watch(ctx, wc.resourceType.ListInterface, wc.currentWatchRevision)
5.1.2 從 watch channel 同步數據
類型包括 新增 修改 刪除 以及錯誤,主要是靠 handleWatchListEvent 處理,比較容易看懂,就是新增的更新刪除的 KV 處理到 results channel,由 watcherSyncer 處理
case event, ok := <-wc.watch.ResultChan():
if !ok {
// If the channel is closed then resync/recreate the watch.
wc.logger.Info("Watch channel closed by remote - recreate watcher")
wc.resyncAndCreateWatcher(ctx)
continue
}
wc.logger.WithField("RC", wc.watch.ResultChan()).Debug("Reading event from results channel")
// Handle the specific event type.
switch event.Type {
case api.WatchAdded, api.WatchModified:
總結:
typha 啓動從參數,環境變量,文件讀取配置參數,創建後端連接(kubernetes / etcd)
註冊了兩個 pipeline,創建 felix bgp syncer,每一個都有註冊了 resourceType
watcherCache,對 resource 進行同步,list watch,扔到 channel results
watcherSyncer 處理 channel results