Kafak的吞吐量爲何這麼高呢?

{"type":"doc","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" Producer的主要功能就是向broker中某個topic的某個分區發送消息,主要由分區器(partitioner)實現。如何選擇分區,然後高吞吐的可靠發送到broker是producer的重點。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"size","attrs":{"size":14}},{"type":"strong","attrs":{}}],"text":"發送消息","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"       首先,Kafka producer提供了一個默認的分區器。如果消息指定了key則分區器會根據key的哈希值來選擇目標分區,若該消息未指定key,則分區器使用輪詢的方式確認目標分區,也可自定義分區策略。確認分區後分區器需要尋找該分區對應的leader所在的broker,然後向leader寫入數據。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"       當producer發送請求給broker後,broker響應結果的超時時間,默認30s。具體流程如圖:","attrs":{}}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/6a/6aa0fda0e3524ac236507f865b444743.png","alt":null,"title":"","style":[{"key":"width","value":"50%"},{"key":"bordertype","value":"none"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" ","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"      發送流程如下:","attrs":{}}]},{"type":"numberedlist","attrs":{"start":null,"normalizeStart":1},"content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":1,"align":null,"origin":null},"content":[{"type":"text","text":"首先要構造一個 ProducerRecord 對象,包含Topic、Partition、Key。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":2,"align":null,"origin":null},"content":[{"type":"text","text":"調用send()方法進行消息發送。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":3,"align":null,"origin":null},"content":[{"type":"text","text":"因爲消息要到網絡上進行傳輸,所以必須進行序列化。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":4,"align":null,"origin":null},"content":[{"type":"text","text":"數據傳到分區器,如果 ProducerRecord 對象已指定了分區,直接把指定的分區返回;如果沒有,那麼分區器會根據 Key 來選擇一個分區。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":5,"align":null,"origin":null},"content":[{"type":"text","text":"這條記錄會被添加到一個記錄批次裏面,批次裏所有的消息會被髮送到相同的topic和partition,然後由一個獨立的線程把這些記錄批次發送到相應的broker上。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":6,"align":null,"origin":null},"content":[{"type":"text","text":"Broker成功接收到消息,表示發送成功,返回消息的RecordMetadata元數據(包括Topic、Partiton和分區中的偏移量),發送失敗可以選擇重試或者直接拋出異常。","attrs":{}}]}],"attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" ","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"size","attrs":{"size":14}},{"type":"strong","attrs":{}}],"text":"主要配置","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"bootstrap.servers","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該屬性指定 brokers 的地址清單。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"buffer.memory","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該參數用來設置producer端內存緩衝區的大小,默認值爲32M。如果應用程序發送消息的速度超過發送到服務器的速度,那麼會導致生產者內存不足,導致send()方法會被阻塞,如果阻塞的時間超過了max.block.ms配置的時長則會拋出異常","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"batch.size","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該參數用於設置批次發送的閾值,默認16KB","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"etires","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該參數用於當生產者發送消息到broker失敗時的重試次數,默認每隔(retry.backoff.on)100ms重試","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"request.timeout.ms","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該參數用於當producer發送請求給broker後,broker響應結果的超時時間,默認30s","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" ","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"size","attrs":{"size":14}},{"type":"strong","attrs":{}}],"text":"發送方式","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"直接發送","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該方式是最簡單的發送數據方式,不管消息是否可靠到達,本質上也是一種異步方式吞吐量最高,但無法保證消息的可靠性。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"同步發送","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"可以明確知道消息發送的結果,然後對結果進行處理,由於同步的方式會阻塞,只有當消息通過get返回future對象時,纔會繼續下一條消息的發送,導致吞吐量下降。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"異步發送","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"在調用send方法發送消息的同時指定一個回調函數,broker在返回響應時會調用該回調函數,通過回調函數能夠對異常情況進行處理,比如記錄異常日誌再統一處理。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" ","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"size","attrs":{"size":14}},{"type":"strong","attrs":{}}],"text":"冪等性","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"       Producer 的冪等性指的是當發送同一條消息時,數據在broker端只會被持久化一次,數據不丟不重,也就是exactly-once,但是在分佈式系統中,出現網絡分區是不可避免的,例如以下情況:","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"kafka broker在回覆ack時,出現網絡故障或者是full gc導致ack timeout,producer將會重發。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"producer掛了,新的producer並沒有old producer的狀態數據。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"producer發送rpc到broker異常,無法得知消息是否寫入只能重試。","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" ","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" 冪等性的目的就是爲了解決重複的問題at least once + 冪等 = exactly once,冪等性是通過兩個關鍵信息保證:","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"PID","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"每個producer初始化時會由broker分配一個唯一的PID來標識唯一的producer,producer每次啓動都不一樣。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"sequence numbers","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"Broker端通過接收到message的PID和sequence numbers 信息進行校驗。最新版本中可通過設置enable.idempotence = true開啓冪等性,但是Kafka的冪等性是有條件的:","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"只能保證producer在單個會話內不丟不重,如果producer出現意外掛掉再重啓是無法保證的,無法做到跨會話級別的不丟不重。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"冪等性不能跨多個partition,只能保證單個 partition 內的冪等性。","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"  ","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"size","attrs":{"size":14}},{"type":"strong","attrs":{}}],"text":"事務性","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" 冪等性提供了單會話單partition的Exactly-Once語義的實現,正是因爲冪等性不提供跨partition和跨會話場景下的保證,因此需要一種更強的事務保證,能夠原子處理多個partition的寫入操作,數據要麼全部寫入成功,要麼全部失敗,kafka是事務性類似於二階段提交,總的來說就是能夠實現【跨分區的原子寫入】。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" kafka的事務性可以保證:","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"跨會話的冪等性寫入,即使中間故障,恢復後依然可以保持冪等性。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"跨會話的事務恢復,如果一個應用實例掛了,啓動的下一個實例依然可以保證上一個事務完成。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"跨多個Topic-Partition的冪等性寫入,Kafka 可以保證跨多個 Topic-Partition 的數據要麼全部寫入成功,要麼全部失敗,不會出現中間狀態。","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" 事務屬性實現前提是冪等性,即配置事務屬性還必須還得配置冪等性,但是冪等性是可以獨立使用的,不需要依賴事務屬性。如果需要保證消息的事務性,需保證如下配置:","attrs":{}}]},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"enable.idempotence = true,transactional.id不設置:只支持冪等性。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"enable.idempotence = true,transactional.id設置:支持事務屬性和冪等性。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"enable.idempotence = false,transactional.id不設置:沒有事務屬性和冪等性。","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"enable.idempotence = false,transactional.id設置:無法獲取到PID,此時會報錯。","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":" ","attrs":{}}]},{"type":"horizontalrule","attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":"center","origin":null},"content":[{"type":"text","marks":[{"type":"size","attrs":{"size":16}}],"text":"更多文章請加入公衆號","attrs":{}}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/e9/e907e7f7f6deb51b782e7173adc49b56.jpeg","alt":null,"title":"","style":[{"key":"width","value":"50%"},{"key":"bordertype","value":"none"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}}]}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章