Kafka架構 - send message(三)*

生產者流程圖

在這裏插入圖片描述

概述:本文主要解析Sender線程從RecordAccumulator拉取消息、發送消息到Kafka Cluster之後的後置處理,比如對請求完成的處理、響應完成的處理、斷開連接的以及新加入連接的處理、超時請求的處理,最後記錄相關信息,執行對應的回調,完成或者重試批次的處理。

而Sender線程從RecordAccumulator拉取消息、發送消息到Kafka Cluster之後的後置處理,會在本系列文章的第四篇、第五篇進行解析。

源碼分析

(Note: 本文基於kafka-clients-2.3.1版本)

Sender

The background thread that handles the sending of produce requests to the Kafka cluster. This thread makes metadata
requests to renew its view of the cluster and then sends produce requests to the appropriate nodes.

  1. Sender#run()

除了執行runOnce方法外,即使Sender線程關閉,仍舊等待未完成的記錄完成。

Sender:

	public void run() {
        log.debug("Starting Kafka producer I/O thread.");
        /* volatile boolean類型,判斷Sender線程是否在運行 */
        while (running) {
            try {
                runOnce();
            } catch (Exception e) {
                
            }
        }
        log.debug("Beginning shutdown of Kafka producer I/O thread, sending remaining records.");
        /* 在sender線程關閉之後,仍發送剩下的記錄 */
       
        /* forceClose: 用於標識是否強制關閉 */
        /* Accumulator.hasUndrained():檢查batches屬性(CopyOnWriteMap)的Deque是否爲非空 */
        /* NetworkClient.inFlightRequestCount():檢查inFlightRequestCount.get()值,表示是否還有等待確認的請求*/
        /* hasPendingTransactionalRequests():檢查pendingRequests(PriorityQueue)屬性是否非空即還有未完成的事務請求,並且事務仍在進行中、正在提交、正在中止、有可中止的錯誤 */
        while (!forceClose && ((this.accumulator.hasUndrained() || this.client.inFlightRequestCount() > 0) || hasPendingTransactionalRequests())) {
            try {
                runOnce();
            } catch (Exception e) {
                
            }
        }

        while (!forceClose && transactionManager != null && transactionManager.hasOngoingTransaction()) {
            if (!transactionManager.isCompleting()) {
                log.info("Aborting incomplete transaction due to shutdown");
                /* 先處理pendingResult屬性,如果CountDownLatch的count爲0,設置pendindResult爲null */
                /* 修改狀態爲ABORTING_TRANSACTION,清空Set#newPartitionsInTransactions */
                /* 構建EndTxnHandler,加入到PriorityQueue<TxnRequestHandler> pendingRequests中 */
                transactionManager.beginAbort();
            }
            try {
                runOnce();
            } catch (Exception e) {
                
            }
        }
        if (forceClose) {
            if (transactionManager != null) {
                /* 關閉TransactionManager */
                transactionManager.close();
            }
            /* 終止MemoryRecordsBuilder,也就是終止記錄的追加 */
            /* IncompleteBatches#incomplete(Set)中刪除未完成的批次 */
            /* 如果批次不是isSplitBatch(boolean), return buffers to the pool */
            /* 執行回調 */
            /* 清空ConcurrentHashMap<TopicPartition, Deque<ProducerBatch>> batches */
            this.accumulator.abortIncompleteBatches();
        }
        try {
        	/* 關閉NetworkClient */
        	/* 也就是狀態從active->closing->closed,然後關閉Selector和MetadataUpdater */
            this.client.close();
        } catch (Exception e) {
            
        }      
    }
  1. Sender#runOne()
    void runOnce() {
        if (transactionManager != null) {
            try {
            	/* 對於非事務類型的生產者,如果之前運行失效的分區沒有被完全解析成功,就會重置producer id、epoch等信息 */
                transactionManager.resetProducerIdIfNeeded();
                /* 根據transaction id是否爲空進行判斷是否是事務的 */
                if (!transactionManager.isTransactional()) {
                    // this is an idempotent producer, so make sure we have a producer id
                    /* 選擇合適的節點,發送InitProducerIdRequest,如果響應沒有錯誤,就更新ProducerIdAndEpoch */
                    maybeWaitForProducerId();
                } else if (transactionManager.hasUnresolvedSequences() && !transactionManager.hasFatalError()) {
                    transactionManager.transitionToFatalError(
                        new KafkaException("..."));
                /* true:事務請求被髮送、拉取,或者一個FindCoordinator請求入隊 */
                } else if (maybeSendAndPollTransactionalRequest()) {
                    return;
                }
                // do not continue sending if the transaction manager is in a failed state or if there
                // is no producer id (for the idempotent case).
                if (transactionManager.hasFatalError() || !transactionManager.hasProducerId()) {
                    RuntimeException lastError = transactionManager.lastError();
                    if (lastError != null)
                    	/* 如果IncompleteBatches非空,則 */
                    	/* 終止MemoryRecordsBuilder,也就是終止記錄的追加 */
                    	/* 執行回調 */
            			/* 如果批次不是isSplitBatch(boolean), return buffers to the pool */   
                        maybeAbortBatches(lastError);
                    client.poll(retryBackoffMs, time.milliseconds());
                    return;
                } else if (transactionManager.hasAbortableError()) {
                	/* 終止MemoryRecordsBuilder,也就是終止記錄的追加 */
                    /* 執行回調 */
            		/* 如果批次不是isSplitBatch(boolean), return buffers to the pool */   
                    accumulator.abortUndrainedBatches(transactionManager.lastError());
                }
            } catch (AuthenticationException e) {
                /* TransactionManager對於驗證失敗的處理 */
                transactionManager.authenticationFailed(e);
            }
        }
        long currentTimeMs = time.milliseconds();
        /* 發送消息到Kafka Cluster,第五篇文章會詳細分析 */
        long pollTimeout = sendProducerData(currentTimeMs);
        client.poll(pollTimeout, currentTimeMs);
    }
  1. NetworkClient#poll(long timeout, long now)
	/**
     * Do actual reads and writes to sockets. 
     */
    @Override
    public List<ClientResponse> poll(long timeout, long now) {
    	/* 1. 確保狀態是活躍狀態 */
    	/* 保證AtomicReference<State> state屬性值爲State.ACTIVE */
        ensureActive();

		/* 2. 處理之前因爲不支持的版本異常或者失去連接而中止的發送 */
		/* LinkedList<ClientResponse> */
        if (!abortedSends.isEmpty()) {
            // If there are aborted sends because of unsupported version exceptions or disconnects,
            // handle them immediately without waiting for Selector#poll.
            List<ClientResponse> responses = new ArrayList<>();
            /* 將那些中止的發送(abortedSends列表)添加到responses裏,然後清空abortedSends列表 */
            handleAbortedSends(responses);
            /* 遍歷responses,對於RequestCompletionHandler屬性調用onCompletion() */
            completeResponses(responses);
            return responses;
        }

		/* 集羣元數據更新完成的時間 */
        long metadataTimeout = metadataUpdater.maybeUpdate(now);
        try {
        	/* 拉取消息的核心方法,第四篇文章會詳細分析 */
            this.selector.poll(Utils.min(timeout, metadataTimeout, defaultRequestTimeoutMs));
        } catch (IOException e) {
            
        }
        long updatedNow = this.time.milliseconds();
        List<ClientResponse> responses = new ArrayList<>();
        /* 處理髮送完成的請求 (不需要響應的請求) */
        handleCompletedSends(responses, updatedNow);
        /* 處理響應完成的請求 */
        handleCompletedReceives(responses, updatedNow);
        /* 處理斷開連接的節點 */
        handleDisconnections(responses, updatedNow);
        /* 處理新加入的連接 */
        handleConnections();
        /* 處理初始化ApiVersion請求 */
        handleInitiateApiVersionRequests(updatedNow);
        /* 處理超時請求 */
        handleTimedOutRequests(responses, updatedNow);
        /* 回調處理-主要步驟:記錄信息,完成或者重試給定的批次 */
        completeResponses(responses);
        return responses;
    }
處理髮送完成的請求 (不需要響應的請求)

NetworkClient:

private void handleCompletedSends(List<ClientResponse> responses, long now) {
    // if no response is expected then when the send is completed, return it
    /* 獲取List<Send> completedSends屬性,遍歷 */
    for (Send send : this.selector.completedSends()) {
    	/* 1. 獲取發送給指定節點的最近一次請求 */
        InFlightRequest request = this.inFlightRequests.lastSent(send.destination());
        if (!request.expectResponse) {
        	/* 2. 完成發送,將請求從隊列中取出 */
            this.inFlightRequests.completeLastSent(send.destination());
            /* 3. 添加到ClientResponse列表中 */
            responses.add(request.completed(null, now));
        }
    }
}

NetworkClient.InFlightRequests:

/* 獲取發送給指定節點的最近一次請求 */
public NetworkClient.InFlightRequest lastSent(String node) {
    return requestQueue(node).peekFirst();
}
/* 獲取指定節點的請求隊列 */
private Deque<NetworkClient.InFlightRequest> requestQueue(String node) {
    Deque<NetworkClient.InFlightRequest> reqs = requests.get(node);
    if (reqs == null || reqs.isEmpty())
        throw new IllegalStateException("There are no in-flight requests for node " + node);
    return reqs;
}
/* 完成發送 */
public NetworkClient.InFlightRequest completeLastSent(String node) {
	/* 從請求隊列中取出 */
    NetworkClient.InFlightRequest inFlightRequest = requestQueue(node).pollFirst();
    inFlightRequestCount.decrementAndGet();
    return inFlightRequest;
}
處理響應完成的請求

可能會限制連接一段時間,對於MetadataResponse、ApiVersionsResponse進行額外處理,其他類型的響應添加到ClientResponse列表中

private void handleCompletedReceives(List<ClientResponse> responses, long now) {
	/* 獲取ArrayList<NetworkReceive> completedReceives屬性 */
    for (NetworkReceive receive : this.selector.completedReceives()) {
    	/* 獲取String#source屬性 */
        String source = receive.source();
        /* 從隊列中拉取最早的InFlightRequest,並且inFlightRequestCount減一 */
        InFlightRequest req = inFlightRequests.completeNext(source);
        Struct responseStruct = parseStructMaybeUpdateThrottleTimeMetrics(receive.payload(), req.header,
            throttleTimeSensor, now);
        // If the received response includes a throttle delay, throttle the connection.
        AbstractResponse body = AbstractResponse.
                parseResponse(req.header.apiKey(), responseStruct, req.header.apiVersion());
        /* 可能限制連接 */
        maybeThrottle(body, req.header.apiVersion(), req.destination, now);
        if (req.isInternalRequest && body instanceof MetadataResponse)
        	/* 處理元數據請求的響應 */
            metadataUpdater.handleCompletedMetadataResponse(req.header, now, (MetadataResponse) body);
        else if (req.isInternalRequest && body instanceof ApiVersionsResponse)
        	/* 處理ApiVersions響應 */
            handleApiVersionsResponse(responses, req, now, (ApiVersionsResponse) body);
        else
        	/* 構造了一個ClientResponse,加入到List<ClientResponse>中 */
            responses.add(req.completed(body, now));
    }
}

**如果來自一個節點的響應的DEFAULT_THROTTLE_TIME大於0,並且客戶端應該throttle,那麼就限制指定節點的連接到某一時刻 **
在這裏插入圖片描述

在這裏插入圖片描述

處理斷開連接的節點

更新reconnectBackoffMs,根據不同類型斷開連接的情況打相應的日誌,將構造的ClientReponse添加到ClientResponse列表中
在這裏插入圖片描述

private void processDisconnection(List<ClientResponse> responses,
                                  String nodeId,
                                  long now,
                                  ChannelState disconnectState) {
    /* 更新節點的reconnectBackoffMs */
    connectionStates.disconnected(nodeId, now);
    apiVersions.remove(nodeId);
    nodesNeedingApiVersionsFetch.remove(nodeId);
    switch (disconnectState.state()) {
        case AUTHENTICATION_FAILED:
            AuthenticationException exception = disconnectState.exception();
            connectionStates.authenticationFailed(nodeId, now, exception);
            metadataUpdater.handleFatalException(exception);     
            break;
        case AUTHENTICATE:
            break;
        case NOT_CONNECTED:
            break;
        default:
            break; 
    }
    for (InFlightRequest request : this.inFlightRequests.clearAll(nodeId)) {
        if (!request.isInternalRequest)
            responses.add(request.disconnected(now, disconnectState.exception()));
        else if (request.header.apiKey() == ApiKeys.METADATA)
            metadataUpdater.handleDisconnection(request.destination);
    }
}
處理新加入的連接

主要設置節點的連接狀態
在這裏插入圖片描述

discoverBrokerVersions: True if we should send an ApiVersionRequest when first connecting to a broker

設置節點的連接狀態爲CHECKING_API_VERSIONS
在這裏插入圖片描述

設置節點的連接狀態爲READY
在這裏插入圖片描述
在這裏插入圖片描述

處理初始化ApiVersion請求

在這裏插入圖片描述

在這裏插入圖片描述

Returns true if the channel has handshake and authentication done.
Returns true if authentication is complete
在這裏插入圖片描述

判斷是否可以發送更多的請求給指定節點
在這裏插入圖片描述

private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now) {
	/* 確保狀態爲ACTIVE */
    ensureActive();
    String nodeId = clientRequest.destination();
    if (!isInternalRequest) {    
        /* 判斷是否已經連接、就緒、能夠發送更多的請求給指定的連接 */
        if (!canSendRequest(nodeId, now))
            throw new IllegalStateException("......");
    }
    AbstractRequest.Builder<?> builder = clientRequest.requestBuilder();
    try {
        NodeApiVersions versionInfo = apiVersions.get(nodeId);
        short version;
        if (versionInfo == null) {
        	/* latestAllowedVersion */
            version = builder.latestAllowedVersion();     
        } else {
        	/* Get the latest version supported by the broker within an allowed range of versions */
            version = versionInfo.latestUsableVersion(clientRequest.apiKey(), builder.oldestAllowedVersion(),
                    builder.latestAllowedVersion());
        }
        doSend(clientRequest, isInternalRequest, now, builder.build(version));
    } catch (UnsupportedVersionException unsupportedVersionException) {
        ClientResponse clientResponse = new ClientResponse(clientRequest.makeHeader(builder.latestAllowedVersion()),
                clientRequest.callback(), clientRequest.destination(), now, now,
                false, unsupportedVersionException, null, null);
        abortedSends.add(clientResponse);
        if (isInternalRequest && clientRequest.apiKey() == ApiKeys.METADATA)
            metadataUpdater.handleFatalException(unsupportedVersionException);
    }
}
private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now, AbstractRequest request) {
    String destination = clientRequest.destination();
    /* 構建RequestHeader實例 */
    RequestHeader header = clientRequest.makeHeader(request.version());
    /* 構建NetworkSend實例 */
    Send send = request.toSend(destination, header);
    InFlightRequest inFlightRequest = new InFlightRequest(
            clientRequest,
            header,
            isInternalRequest,
            request,
            send,
            now);
    this.inFlightRequests.add(inFlightRequest);
    selector.send(send);
}
處理超時請求

NetworkClient:

    private void handleTimedOutRequests(List<ClientResponse> responses, long now) {
    	/* 1. 獲取含有超時請求的節點ID列表 */
        List<String> nodeIds = this.inFlightRequests.nodesWithTimedOutRequests(now);
        for (String nodeId : nodeIds) {
            /* 2. close connection to the node */
            /* Kafka內部的Selector */
            this.selector.close(nodeId);
            /* 3. 處理節點失去連接的情況 */
            processDisconnection(responses, nodeId, now, ChannelState.LOCAL_CLOSE);
        }
        // we disconnected, so we should probably refresh our metadata
        if (!nodeIds.isEmpty())
            /* 4. 請求更新元數據 */
            metadataUpdater.requestUpdate();
    }
	/**
     * Returns a list of nodes with pending in-flight request, that need to be timed out
     */
    public List<String> nodesWithTimedOutRequests(long now) {
        List<String> nodeIds = new ArrayList<>();
        for (Map.Entry<String, Deque<NetworkClient.InFlightRequest>> requestEntry : requests.entrySet()) {
            String nodeId = requestEntry.getKey();
            Deque<NetworkClient.InFlightRequest> deque = requestEntry.getValue();
            if (hasExpiredRequest(now, deque))
                nodeIds.add(nodeId);
        }
        return nodeIds;
    }

	private Boolean hasExpiredRequest(long now, Deque<NetworkClient.InFlightRequest> deque) {
        for (NetworkClient.InFlightRequest request : deque) {
            long timeSinceSend = Math.max(0, now - request.sendTimeMs);
            /* 對於發送時間超過request.timeout.ms的,認定爲超時,需要失效 */
            if (timeSinceSend > request.requestTimeoutMs)
                return true;
        }
        return false;
    }

NetworkClient:

    private void processDisconnection(List<ClientResponse> responses,
                                      String nodeId,
                                      long now,
                                      ChannelState disconnectState) {
        /* 3.1 更新節點連接狀態、重連補償時間、重連補償最大時間 */
        connectionStates.disconnected(nodeId, now);
        /* ApiVersions */
        apiVersions.remove(nodeId);
        /* HashMap<String, ApiVersionsRequest.Builder> */
        nodesNeedingApiVersionsFetch.remove(nodeId);
        switch (disconnectState.state()) {
            case AUTHENTICATION_FAILED:
                AuthenticationException exception = disconnectState.exception();
                /* 3.2 更新節點連接狀態等 */
                connectionStates.authenticationFailed(nodeId, now, exception);
                /* 3.3 元數據處理異常情況 */
                metadataUpdater.handleFatalException(exception);
                break;
            case AUTHENTICATE:
                break;
            case NOT_CONNECTED:
                break;
            default:
                break; 
        }
        for (InFlightRequest request : this.inFlightRequests.clearAll(nodeId)) {
            log.trace("......");
            if (!request.isInternalRequest)
            	/* 3.4 添加一個對應的ClientResponse */
                responses.add(request.disconnected(now, disconnectState.exception()));
            else if (request.header.apiKey() == ApiKeys.METADATA)
            	/* 3.5 元數據處理失去連接的情況 */
                metadataUpdater.handleDisconnection(request.destination);
        }
    }

3.1

	public void disconnected(String id, long now) {
        NodeConnectionState nodeState = nodeState(id);
        nodeState.state = ConnectionState.DISCONNECTED;
        nodeState.lastConnectAttemptMs = now;
        updateReconnectBackoff(nodeState);
    }

	private NodeConnectionState nodeState(String id) {
		/* Map<String, NodeConnectionState> */
        NodeConnectionState state = this.nodeState.get(id);
        if (state == null)
            throw new IllegalStateException("No entry found for connection " + id);
        return state;
    }

	/**
     * Update the node reconnect backoff exponentially.
     * The delay is reconnect.backoff.ms * 2**(failures - 1) * (+/- 20% random jitter)
     * Up to a (pre-jitter) maximum of reconnect.backoff.max.ms
     */
    private void updateReconnectBackoff(NodeConnectionState nodeState) {
        if (this.reconnectBackoffMaxMs > this.reconnectBackoffInitMs) {
            nodeState.failedAttempts += 1;
            double backoffExp = Math.min(nodeState.failedAttempts - 1, this.reconnectBackoffMaxExp);
            double backoffFactor = Math.pow(RECONNECT_BACKOFF_EXP_BASE, backoffExp);
            long reconnectBackoffMs = (long) (this.reconnectBackoffInitMs * backoffFactor);
            // Actual backoff is randomized to avoid connection storms.
            double randomFactor = ThreadLocalRandom.current().nextDouble(0.8, 1.2);
            nodeState.reconnectBackoffMs = (long) (randomFactor * reconnectBackoffMs);
        }
    }

3.2

	public void authenticationFailed(String id, long now, AuthenticationException exception) {
        NodeConnectionState nodeState = nodeState(id);
        nodeState.authenticationException = exception;
        nodeState.state = ConnectionState.AUTHENTICATION_FAILED;
        nodeState.lastConnectAttemptMs = now;
        updateReconnectBackoff(nodeState);
    }

3.3

		@Override
        public void handleFatalException(KafkaException fatalException) {
            if (metadata.updateRequested())
                metadata.failedUpdate(time.milliseconds(), fatalException);
            inProgressRequestVersion = null;
        }

		public synchronized boolean updateRequested() {
        	return this.needUpdate;
    	}

		public synchronized void failedUpdate(long now, KafkaException fatalException) {
        	this.lastRefreshMs = now;
        	this.fatalException = fatalException;
    	}

3.4

		public ClientResponse disconnected(long timeMs, AuthenticationException authenticationException) {
            return new ClientResponse(header, callback, destination, createdTimeMs, timeMs,
                    true, null, authenticationException, null);
        }

3.5

		@Override
        public void handleDisconnection(String destination) {
            Cluster cluster = metadata.fetch();
            if (cluster.isBootstrapConfigured()) {
                int nodeId = Integer.parseInt(destination);
                Node node = cluster.nodeById(nodeId);
                if (node != null)
                    log.warn("Bootstrap broker {} disconnected", node);
            }
            inProgressRequestVersion = null;
        }

NetworkClient:

		@Override
        public void requestUpdate() {
        	/* Metadata */
            this.metadata.requestUpdate();
        }

Metadata:

	/**
     * Request an update of the current cluster metadata info, return the current updateVersion before the update
     */
    public synchronized int requestUpdate() {
        this.needUpdate = true;
        return this.updateVersion;
    }
completeResponses(…) - 完成最後的響應處理

主要步驟:記錄信息,完成或者重試給定的批次

NetworkClient:

	private void completeResponses(List<ClientResponse> responses) {
        for (ClientResponse response : responses) {
            try {
                response.onComplete();
            } catch (Exception e) {
                log.error("Uncaught error in request completion:", e);
            }
        }
    }

ClientResponse:

	public void onComplete() {
        if (callback != null)
            callback.onComplete(this);
    }

Sender:

	/**
     * Handle a produce response - 記錄信息,完成或者重試給定的批次
     */
    private void handleProduceResponse(ClientResponse response, Map<TopicPartition, ProducerBatch> batches, long now) {
        RequestHeader requestHeader = response.requestHeader();
        long receivedTimeMs = response.receivedTimeMs();
        int correlationId = requestHeader.correlationId();
        if (response.wasDisconnected()) {
            for (ProducerBatch batch : batches.values())
                completeBatch(batch, new ProduceResponse.PartitionResponse(Errors.NETWORK_EXCEPTION), correlationId, now, 0L);
        } else if (response.versionMismatch() != null) {
            for (ProducerBatch batch : batches.values())
                completeBatch(batch, new ProduceResponse.PartitionResponse(Errors.UNSUPPORTED_VERSION), correlationId, now, 0L);
        } else { 
            if (response.hasResponse()) {
                ProduceResponse produceResponse = (ProduceResponse) response.responseBody();
                for (Map.Entry<TopicPartition, ProduceResponse.PartitionResponse> entry : produceResponse.responses().entrySet()) {
                    TopicPartition tp = entry.getKey();
                    ProduceResponse.PartitionResponse partResp = entry.getValue();
                    ProducerBatch batch = batches.get(tp);
                    completeBatch(batch, partResp, correlationId, now, receivedTimeMs + produceResponse.throttleTimeMs());
                }
                /* 1. 記錄一些信息 */
                this.sensors.recordLatency(response.destination(), response.requestLatencyMs());
            } else {
                // this is the acks = 0 case, just complete all requests
                for (ProducerBatch batch : batches.values()) {
                	/* 2. 完成或者重試給定的批次 */
                    completeBatch(batch, new ProduceResponse.PartitionResponse(Errors.NONE), correlationId, now, 0L);
                }
            }
        }
  1. 記錄相關信息
		public void recordLatency(String node, long latency) {
            long now = time.milliseconds();
            this.requestTimeSensor.record(latency, now);
            if (!node.isEmpty()) {
                String nodeTimeName = "node-" + node + ".latency";
                Sensor nodeRequestTime = this.metrics.getSensor(nodeTimeName);
                if (nodeRequestTime != null)
                    nodeRequestTime.record(latency, now);
            }
        }
	public void record(double value, long timeMs, boolean checkQuotas) {
        if (shouldRecord()) {
            this.lastRecordTime = timeMs;
            synchronized (this) {
                synchronized (metricLock()) {
                    for (Stat stat : this.stats)
                        stat.record(config, value, timeMs);
                }
                if (checkQuotas)
                    checkQuotas(timeMs);
            }
            for (Sensor parent : parents)
                parent.record(value, timeMs, checkQuotas);
        }
    }
  1. 完成或者重試給定的批次
	/**
     * Complete or retry the given batch of records.
     */
    private void completeBatch(ProducerBatch batch, ProduceResponse.PartitionResponse response, long correlationId,
                               long now, long throttleUntilTimeMs) {
        Errors error = response.error;

        if (error == Errors.MESSAGE_TOO_LARGE && batch.recordCount > 1 && !batch.isDone() &&
                (batch.magic() >= RecordBatch.MAGIC_VALUE_V2 || batch.isCompressed())) {
          
            if (transactionManager != null)
            	/* 刪除InFlightBatch */
                transactionManager.removeInFlightBatch(batch);
            this.accumulator.splitAndReenqueue(batch);
            /* 刪除指定ProducerBatch, 釋放佔用的空間 */
            maybeRemoveAndDeallocateBatch(batch);
            this.sensors.recordBatchSplit();
        } else if (error != Errors.NONE) {
        	/* 判斷是否能重試 */
            if (canRetry(batch, response, now)) {       
                if (transactionManager == null) {
                	/* 將批次重新加入到隊列中 */
                    reenqueueBatch(batch, now);
                /* 判斷ProducerIdAndEpoch的producerId和epoch是否與給定的相等 */
                } else if (transactionManager.hasProducerIdAndEpoch(batch.producerId(), batch.producerEpoch())) {         
                    /* 將批次重新加入到隊列中 */
                    reenqueueBatch(batch, now);
                } else {
                	/* 批次失敗處理 */
                    failBatch(batch, response, new OutOfOrderSequenceException("......"), false);
                }
            /* 有重複的序列號 */
            } else if (error == Errors.DUPLICATE_SEQUENCE_NUMBER) {
            	/* 批次完成處理 */
                completeBatch(batch, response);
            } else {
                final RuntimeException exception;
                if (error == Errors.TOPIC_AUTHORIZATION_FAILED)
                    exception = new TopicAuthorizationException(batch.topicPartition.topic());
                else if (error == Errors.CLUSTER_AUTHORIZATION_FAILED)
                    exception = new ClusterAuthorizationException("The producer is not authorized to do idempotent sends");
                else
                    exception = error.exception();
                /* 批次失敗處理 */
                failBatch(batch, response, exception, batch.attempts() < this.retries);
            }
            if (error.exception() instanceof InvalidMetadataException) {       
                metadata.requestUpdate();
            }
        } else {
        	/* 批次完成處理 */
            completeBatch(batch, response);
        }

        /* indicate whether the producer should guarantee the message order on the broker or not */
        if (guaranteeMessageOrder)
        	/* 將其加入到HashMap<TopicPartition, Long>中 */
            this.accumulator.unmutePartition(batch.topicPartition, throttleUntilTimeMs);
    }

將批次重新加入到隊列中

private void reenqueueBatch(ProducerBatch batch, long currentTimeMs) {
	/* 將批次重新入隊 */
    this.accumulator.reenqueue(batch, currentTimeMs);
    /* 刪除批次 */
    maybeRemoveFromInflightBatches(batch);
    this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
}

public void reenqueue(ProducerBatch batch, long now) {
    batch.reenqueued(now);
    Deque<ProducerBatch> deque = getOrCreateDeque(batch.topicPartition);
    synchronized (deque) {
        if (transactionManager != null)
        	/* 按照序列號順序插入批次到隊列中 */
            insertInSequenceOrder(deque, batch);
        else
            deque.addFirst(batch);
    }
}

void reenqueued(long now) {
    attempts.getAndIncrement();
    lastAttemptMs = Math.max(lastAppendTime, now);
    lastAppendTime = Math.max(lastAppendTime, now);
    retry = true;
}

failBatch(…) - 批次失敗處理

private void failBatch(ProducerBatch batch,
                       ProduceResponse.PartitionResponse response,
                       RuntimeException exception,
                       boolean adjustSequenceNumbers) {
    failBatch(batch, response.baseOffset, response.logAppendTime, exception, adjustSequenceNumbers);
}

private void failBatch(ProducerBatch batch,
                       long baseOffset,
                       long logAppendTime,
                       RuntimeException exception,
                       boolean adjustSequenceNumbers) {
    if (transactionManager != null) {
        transactionManager.handleFailedBatch(batch, exception, adjustSequenceNumbers);
    }

    this.sensors.recordErrors(batch.topicPartition.topic(), batch.recordCount);
	/* 如果 原子設置FinalState,ProduceRequestResult設置相關屬性,Thunk觸發回調 成功*/
    if (batch.done(baseOffset, logAppendTime, exception)) {
    	/* 刪除指定ProducerBatch, 釋放佔用的空間 */
        maybeRemoveAndDeallocateBatch(batch);
    }
}
/* TransactionManager */
public synchronized void handleFailedBatch(ProducerBatch batch, RuntimeException exception, boolean adjustSequenceNumbers) {
	/* 傳遞異常狀態 */
    maybeTransitionToErrorState(exception);
	/* 判斷ProducerIdAndEpoch的producerId和epoch是否與給定的相等 */
    if (!hasProducerIdAndEpoch(batch.producerId(), batch.producerEpoch())) {   
        return;
    }

    if (exception instanceof OutOfOrderSequenceException && !isTransactional()) {    
		/* 重置ProducerIdAndEpoch */
        resetProducerId();
    } else {
    	/* 刪除InFlightBatch */
        removeInFlightBatch(batch);
        if (adjustSequenceNumbers)
        	/* 調整序列號 */
            adjustSequencesDueToFailedBatch(batch);
    }
}

判斷是否能重試

synchronized boolean canRetry(ProduceResponse.PartitionResponse response, ProducerBatch batch) {
	/* 判斷ProducerIdAndEpoch的producerId和epoch是否與給定的相等 */
    if (!hasProducerIdAndEpoch(batch.producerId(), batch.producerEpoch()))
        return false;
    Errors error = response.error;
    /* && Set<TopicPartition>.containers(topicPartition) && (reopened || sequence - lastAckedSequence(topicPartition).orElse(NO_LAST_ACKED_SEQUENCE_NUMBER) == 1) */
    if (error == Errors.OUT_OF_ORDER_SEQUENCE_NUMBER && !hasUnresolvedSequence(batch.topicPartition) &&
            (batch.sequenceHasBeenReset() || !isNextSequence(batch.topicPartition, batch.baseSequence())))
        return true;
    if (error == Errors.UNKNOWN_PRODUCER_ID) {
        if (response.logStartOffset == -1)
            return true;
        /* reopened */
        if (batch.sequenceHasBeenReset()) {
            return true;
        } else if (lastAckedOffset(batch.topicPartition).orElse(NO_LAST_ACKED_SEQUENCE_NUMBER) < response.logStartOffset) {
        	/* 修改序列號 */
            startSequencesAtBeginning(batch.topicPartition);
            return true;
        }
    }
    return false;
}

/* 修改序列號 */
private void startSequencesAtBeginning(TopicPartition topicPartition) {
    final AtomicInteger sequence = new AtomicInteger(0);
    topicPartitionBookkeeper.getPartition(topicPartition).resetSequenceNumbers(inFlightBatch -> {  
        inFlightBatch.resetProducerState(new ProducerIdAndEpoch(inFlightBatch.producerId(),
                inFlightBatch.producerEpoch()), sequence.get(), inFlightBatch.isTransactional());
        sequence.getAndAdd(inFlightBatch.recordCount);
    });
    setNextSequence(topicPartition, sequence.get());
    topicPartitionBookkeeper.getPartition(topicPartition).lastAckedSequence = NO_LAST_ACKED_SEQUENCE_NUMBER;
}

completeBatch(…) - 完成批次處理

	private void completeBatch(ProducerBatch batch, ProduceResponse.PartitionResponse response) {
        if (transactionManager != null) {
        	/* 判斷ProducerIdAndEpoch的producerId和epoch是否與給定的相等,不相等,則debug相關日誌,然後返回 */
        	/* 更新lastAckedSequence */
        	/* 更新lastAckedOffset */
        	/* 刪除InFlightBatch */
            transactionManager.handleCompletedBatch(batch, response);
        }
		/* 如果 原子設置FinalState,ProduceRequestResult設置相關屬性,Thunk觸發回調 成功*/
        if (batch.done(response.baseOffset, response.logAppendTime, null)) {
        	/* 刪除指定ProducerBatch, 釋放佔用的空間 */
            maybeRemoveAndDeallocateBatch(batch);
        }
    }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章