生產者第二節:render thread
Sender線程,發送流程:
1.根據RecordAccumulator緩存情況,篩選出可以向哪些node節點發送消息。read()方法得到
2.根據生產者和節點的鏈接情況(由NewworkClient管理),過濾Node節點
3.生成相應的請求,每個Node節點,只生成一個請求。
4.調用NetWorkClient 將請求發送出去。
一,Sender
如果沒有添加事務,那麼就直接走sendProducerData()方法
void run(long now) {
if (transactionManager != null) {
...
}
long pollTimeout = sendProducerData(now); //【入】
client.poll(pollTimeout, now);
}
1.sendProducerData()
private long sendProducerData(long now) {
Cluster cluster = metadata.fetch(); //獲得 集羣元數據
// 1.get the list of partitions with data ready to send //【入】
RecordAccumulator.ReadyCheckResult result = this.accumulator.ready(cluster, now);
//如果有分區的leader 未知,強制 更新
if (!result.unknownLeaderTopics.isEmpty()) {
for (String topic : result.unknownLeaderTopics)
this.metadata.add(topic);
this.metadata.requestUpdate();
}
// remove any nodes we aren't ready to send to
//node 存儲方式轉換。
Iterator<Node> iter = result.readyNodes.iterator();
long notReadyTimeout = Long.MAX_VALUE;
while (iter.hasNext()) {
Node node = iter.next();
if (!this.client.ready(node, now)) { //判斷client中有沒有
iter.remove();
notReadyTimeout = Math.min(notReadyTimeout, this.client.connectionDelay(node, now));
}
}
// 2.create produce requests
// 創建 生產者 請求 【入】
Map<Integer, List<ProducerBatch>> batches = this.accumulator.drain(cluster, result.readyNodes, this.maxRequestSize, now);
if (guaranteeMessageOrder) {
// Mute all the partitions drained
for (List<ProducerBatch> batchList : batches.values()) {
for (ProducerBatch batch : batchList)
this.accumulator.mutePartition(batch.topicPartition);
}
}
List<ProducerBatch> expiredBatches = this.accumulator.expiredBatches(this.requestTimeout, now);
// Reset the producer id if an expired batch has previously been sent to the broker. Also update the metrics
// for expired batches. see the documentation of @TransactionState.resetProducerId to understand why
// we need to reset the producer id here.
if (!expiredBatches.isEmpty())
log.trace("Expired {} batches in accumulator", expiredBatches.size());
for (ProducerBatch expiredBatch : expiredBatches) {
failBatch(expiredBatch, -1, NO_TIMESTAMP, expiredBatch.timeoutException(), false);
if (transactionManager != null && expiredBatch.inRetry()) {
// This ensures that no new batches are drained until the current in flight batches are fully resolved.
transactionManager.markSequenceUnresolved(expiredBatch.topicPartition);
}
}
sensors.updateProduceRequestMetrics(batches);
long pollTimeout = Math.min(result.nextReadyCheckDelayMs, notReadyTimeout);
//3. 創建request
sendProduceRequests(batches, now); //【入】
return pollTimeout;
}
1.1 RecordAccumulator:read()
獲得recordAccumulator中 準備好的 分區,並且要找到該分區的 leader node.
public ReadyCheckResult ready(Cluster cluster, long nowMs) {
Set<Node> readyNodes = new HashSet<>(); //用來記錄可以向哪些節點發送消息
//記錄下次需要調用read() 方法的時間間隔
long nextReadyCheckDelayMs = Long.MAX_VALUE;
Set<String> unknownLeaderTopics = new HashSet<>();
boolean exhausted = this.free.queued() > 0;
//遍歷 batches集合
for (Map.Entry<TopicPartition, Deque<ProducerBatch>> entry : this.batches.entrySet()) {
TopicPartition part = entry.getKey();
Deque<ProducerBatch> deque = entry.getValue();
Node leader = cluster.leaderFor(part); //獲得分區的 leader節點
synchronized (deque) {
if (leader == null && !deque.isEmpty()) {
unknownLeaderTopics.add(part.topic()); //放入集合中
} else if (!readyNodes.contains(leader) && !muted.contains(part)) {
ProducerBatch batch = deque.peekFirst(); //彈出第一個
if (batch != null) { //五個條件
long waitedTimeMs = batch.waitedTimeMs(nowMs);
boolean backingOff = batch.attempts() > 0 && waitedTimeMs < retryBackoffMs;
long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
boolean full = deque.size() > 1 || batch.isFull();
boolean expired = waitedTimeMs >= timeToWaitMs;
boolean sendable = full || expired || exhausted || closed || flushInProgress();
if (sendable && !backingOff) { //準備好了
readyNodes.add(leader);
} else { //失敗
nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
}
}
}
}
}
return new ReadyCheckResult(readyNodes, nextReadyCheckDelayMs, unknownLeaderTopics);
}
1.2.RecordAccumulator:drain()
根據node ,返回map<Integer ,List<xxBatch>> ,key是node id。
public Map<Integer, List<ProducerBatch>> drain(...) {
if (nodes.isEmpty())
return Collections.emptyMap();
Map<Integer, List<ProducerBatch>> batches = new HashMap<>();
for (Node node : nodes) { //遍歷
int size = 0;
//獲得當前 node上的分區集合
List<PartitionInfo> parts = cluster.partitionsForNode(node.id());
List<ProducerBatch> ready = new ArrayList<>();
/* to make starvation less likely this loop doesn't start at 0 */
//drainIndex 記錄發送停止時的未知, 此處計算開始位置
int start = drainIndex = drainIndex % parts.size();
do {
//獲取分區詳細信息
PartitionInfo part = parts.get(drainIndex);
TopicPartition tp = new TopicPartition(part.topic(), part.partition());
// Only proceed if the partition has no in-flight batches.
if (!muted.contains(tp)) {
//獲得該 分區,所要發送的 batch list
Deque<ProducerBatch> deque = getDeque(tp);
if (deque != null) {
synchronized (deque) { //取出第一個
ProducerBatch first = deque.peekFirst();
if (first != null) {
boolean backoff = first.attempts() > 0 && first.waitedTimeMs(now) < retryBackoffMs;
// Only drain the batch if it is not during backoff period.
if (!backoff) {
if (size + first.estimatedSizeInBytes() > maxSize && !ready.isEmpty()) {
//數據量 已滿, 結束循環。一般是一個請求的大小。
break;
} else {
ProducerIdAndEpoch producerIdAndEpoch = null;
boolean isTransactional = false;
...
// 獲取一個,放到ready中。
ProducerBatch batch = deque.pollFirst();
...
batch.close();
size += batch.records().sizeInBytes();
ready.add(batch);
batch.drained(now);
}
}
}
}
}
}
this.drainIndex = (this.drainIndex + 1) % parts.size();
} while (start != drainIndex);
batches.put(node.id(), ready);
}
return batches;
}
1.3.創建request體
private void sendProduceRequests(Map<Integer, List<ProducerBatch>> collated, long now) {
for (Map.Entry<Integer, List<ProducerBatch>> entry : collated.entrySet())
sendProduceRequest(now, entry.getKey(), acks, requestTimeout, entry.getValue());
}
一個node 創建一個request體,進入:
private void sendProduceRequest(long now, int destination, short acks, int timeout, List<ProducerBatch> batches) {
if (batches.isEmpty())
return;
//根據 tp 分類 ByteBuffer
Map<TopicPartition, MemoryRecords> produceRecordsByPartition = new HashMap<>(batches.size());
final Map<TopicPartition, ProducerBatch> recordsByPartition = new HashMap<>(batches.size());
// find the minimum magic version used when creating the record sets
byte minUsedMagic = apiVersions.maxUsableProduceMagic();
for (ProducerBatch batch : batches) {
if (batch.magic() < minUsedMagic)
minUsedMagic = batch.magic();
}
//遍歷 batch
for (ProducerBatch batch : batches) {
TopicPartition tp = batch.topicPartition;
MemoryRecords records = batch.records();
if (!records.hasMatchingMagic(minUsedMagic))
records = batch.records().downConvert(minUsedMagic, 0, time).records();
produceRecordsByPartition.put(tp, records);
recordsByPartition.put(tp, batch);
}
String transactionalId = null;
if (transactionManager != null && transactionManager.isTransactional()) {
transactionalId = transactionManager.transactionalId();
}
// request builder創建 也就是一個包裝類
ProduceRequest.Builder requestBuilder = ProduceRequest.Builder.forMagic(minUsedMagic, acks, timeout,
produceRecordsByPartition, transactionalId);
//回調
RequestCompletionHandler callback = new RequestCompletionHandler() {
public void onComplete(ClientResponse response) {
handleProduceResponse(response, recordsByPartition, time.milliseconds());
}
};
String nodeId = Integer.toString(destination);
// request 創建 【入】 ★
ClientRequest clientRequest = client.newClientRequest(nodeId, requestBuilder, now, acks != 0, callback);
client.send(clientRequest, now); //【send ★】
log.trace("Sent produce request to {}: {}", nodeId, requestBuilder);
}
大概流程:遍歷Node下的batch,根據topicPartition,區分開batch,將byteBuffer提出來,然後封裝成request,這裏的request其實也就是一個簡單的封裝Pojo。
1.3.1 ProducerRequest.builder(內部類)
public static class Builder extends AbstractRequest.Builder<ProduceRequest> {
private final short acks;
private final int timeout;
private final Map<TopicPartition, MemoryRecords> partitionRecords; //重點
private final String transactionalId;
...
}
1.3.2 ClientRequest
public final class ClientRequest {
private final String destination;
private final AbstractRequest.Builder<?> requestBuilder;
private final int correlationId;
private final String clientId;
private final long createdTimeMs;
private final boolean expectResponse;
private final RequestCompletionHandler callback;
...
}
二,NetworkClient
接着sender類,我們來到了client.send()方法,自然就跳轉道理NetworkClient類中了
2.1 send()
public void send(ClientRequest request, long now) {
doSend(request, false, now);
}
在networkClient中,從ClientRequest中,把builder取出來
private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now) {
String nodeId = clientRequest.destination();
if (!isInternalRequest) { //檢測是否能夠向指定node發送請求
if (!canSendRequest(nodeId))
throw new IllegalStateException("Attempt to send a request to node " + nodeId + " which is not ready.");
}
AbstractRequest.Builder<?> builder = clientRequest.requestBuilder();
try {
NodeApiVersions versionInfo = apiVersions.get(nodeId);
short version;
if (versionInfo == null) {
version = builder.latestAllowedVersion();
} else {
version = versionInfo.latestUsableVersion(clientRequest.apiKey(), builder.oldestAllowedVersion(),
builder.latestAllowedVersion());
}
doSend(clientRequest, isInternalRequest, now, builder.build(version)); //【入】
}
}
再次進入doSend()
這裏組要是構建 inFlightReuqest ,將其放到容器中
private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now, AbstractRequest request) {
String nodeId = clientRequest.destination();
RequestHeader header = clientRequest.makeHeader(request.version());
Send send = request.toSend(nodeId, header);
//上面的字段,都是給這一步,做準備!組裝 inFlightRequest
InFlightRequest inFlightRequest = new InFlightRequest(...);
this.inFlightRequests.add(inFlightRequest);
selector.send(inFlightRequest.send); //【入】
}
添加完畢,我們就需要跳轉道 selector,選擇器了!
補充:Send,實現類ByteBufferSend,其實就是ByteBuffer的封裝,+發送目的
public class ByteBufferSend implements Send {
private final String destination;
private final int size;
protected final ByteBuffer[] buffers;
private int remaining;
private boolean pending = false;
...
}
2.2 Selector
這裏的selector並不是 nio中的selector,它包裝了nio的selector,它的字段也是很多!看一下核心字段:
public class Selector implements Selectable, AutoCloseable {
private final java.nio.channels.Selector nioSelector; //監聽網絡io事件
private final Map<String, KafkaChannel> channels; //維護 nodeid 與kafkaChannel關係
private final Set<KafkaChannel> explicitlyMutedChannels;
private boolean outOfMemory;
private final List<Send> completedSends; //記錄完全發送出去的請求, send和xxReceive 表示讀和寫用的緩存,頂層通過ByteBuffer實現
private final List<NetworkReceive> completedReceives; //記錄完全收到的請求
private final Map<KafkaChannel, Deque<NetworkReceive>> stagedReceives; //暫存一次 OP_READ事件過程中讀取到的全部請求,之後保存到com..Receives中
private final Set<SelectionKey> immediatelyConnectedKeys;
private final Map<String, KafkaChannel> closingChannels;
private Set<SelectionKey> keysWithBufferedRead;
private final Map<String, ChannelState> disconnected; //記錄一次poll 中發現的斷開的連接 和新建立的連接
private final List<String> connected;
private final List<String> failedSends; //記錄,向哪些Node發送請求失敗了。
...
}
核心方法:connect(),負責創建kafkaChannel,並添加到Channels集合中保存起來。
public void connect(String id, InetSocketAddress address, int sendBufferSize, int receiveBufferSize) throws IOException {
SocketChannel socketChannel = SocketChannel.open(); //創建sockerChannel
socketChannel.configureBlocking(false); //配置成 非阻塞模式
Socket socket = socketChannel.socket();
socket.setKeepAlive(true); //設置爲長連接
if (sendBufferSize != Selectable.USE_DEFAULT_BUFFER_SIZE)
socket.setSendBufferSize(sendBufferSize); //設置SO_SNDBUF大小
if (receiveBufferSize != Selectable.USE_DEFAULT_BUFFER_SIZE)
socket.setReceiveBufferSize(receiveBufferSize); //設置SO_RCVBUF大小
socket.setTcpNoDelay(true);
boolean connected;
connected = socketChannel.connect(address);
//將這個socketChannel註冊到 nioSelector上,並且關注OP_CONNECT事件
SelectionKey key = socketChannel.register(nioSelector, SelectionKey.OP_CONNECT);
//創建kafkaChannel
KafkaChannel channel = buildChannel(socketChannel, id, key);
}
回到主線路,使用kafka的selector的send()方法,發送請求request
public void send(Send send) {
String connectionId = send.destination();
//獲得send緩存
KafkaChannel channel = openOrClosingChannelOrFail(connectionId);
if (closingChannels.containsKey(connectionId)) {
this.failedSends.add(connectionId);
} else {
channel.setSend(send);
}
}
selector真正執行 網絡I/O的地方,poll(),它會調用nioSelector的select()方法,