Kafka架构 - send message(三)*

生产者流程图

在这里插入图片描述

概述:本文主要解析Sender线程从RecordAccumulator拉取消息、发送消息到Kafka Cluster之后的后置处理,比如对请求完成的处理、响应完成的处理、断开连接的以及新加入连接的处理、超时请求的处理,最后记录相关信息,执行对应的回调,完成或者重试批次的处理。

而Sender线程从RecordAccumulator拉取消息、发送消息到Kafka Cluster之后的后置处理,会在本系列文章的第四篇、第五篇进行解析。

源码分析

(Note: 本文基于kafka-clients-2.3.1版本)

Sender

The background thread that handles the sending of produce requests to the Kafka cluster. This thread makes metadata
requests to renew its view of the cluster and then sends produce requests to the appropriate nodes.

  1. Sender#run()

除了执行runOnce方法外,即使Sender线程关闭,仍旧等待未完成的记录完成。

Sender:

	public void run() {
        log.debug("Starting Kafka producer I/O thread.");
        /* volatile boolean类型,判断Sender线程是否在运行 */
        while (running) {
            try {
                runOnce();
            } catch (Exception e) {
                
            }
        }
        log.debug("Beginning shutdown of Kafka producer I/O thread, sending remaining records.");
        /* 在sender线程关闭之后,仍发送剩下的记录 */
       
        /* forceClose: 用于标识是否强制关闭 */
        /* Accumulator.hasUndrained():检查batches属性(CopyOnWriteMap)的Deque是否为非空 */
        /* NetworkClient.inFlightRequestCount():检查inFlightRequestCount.get()值,表示是否还有等待确认的请求*/
        /* hasPendingTransactionalRequests():检查pendingRequests(PriorityQueue)属性是否非空即还有未完成的事务请求,并且事务仍在进行中、正在提交、正在中止、有可中止的错误 */
        while (!forceClose && ((this.accumulator.hasUndrained() || this.client.inFlightRequestCount() > 0) || hasPendingTransactionalRequests())) {
            try {
                runOnce();
            } catch (Exception e) {
                
            }
        }

        while (!forceClose && transactionManager != null && transactionManager.hasOngoingTransaction()) {
            if (!transactionManager.isCompleting()) {
                log.info("Aborting incomplete transaction due to shutdown");
                /* 先处理pendingResult属性,如果CountDownLatch的count为0,设置pendindResult为null */
                /* 修改状态为ABORTING_TRANSACTION,清空Set#newPartitionsInTransactions */
                /* 构建EndTxnHandler,加入到PriorityQueue<TxnRequestHandler> pendingRequests中 */
                transactionManager.beginAbort();
            }
            try {
                runOnce();
            } catch (Exception e) {
                
            }
        }
        if (forceClose) {
            if (transactionManager != null) {
                /* 关闭TransactionManager */
                transactionManager.close();
            }
            /* 终止MemoryRecordsBuilder,也就是终止记录的追加 */
            /* IncompleteBatches#incomplete(Set)中删除未完成的批次 */
            /* 如果批次不是isSplitBatch(boolean), return buffers to the pool */
            /* 执行回调 */
            /* 清空ConcurrentHashMap<TopicPartition, Deque<ProducerBatch>> batches */
            this.accumulator.abortIncompleteBatches();
        }
        try {
        	/* 关闭NetworkClient */
        	/* 也就是状态从active->closing->closed,然后关闭Selector和MetadataUpdater */
            this.client.close();
        } catch (Exception e) {
            
        }      
    }
  1. Sender#runOne()
    void runOnce() {
        if (transactionManager != null) {
            try {
            	/* 对于非事务类型的生产者,如果之前运行失效的分区没有被完全解析成功,就会重置producer id、epoch等信息 */
                transactionManager.resetProducerIdIfNeeded();
                /* 根据transaction id是否为空进行判断是否是事务的 */
                if (!transactionManager.isTransactional()) {
                    // this is an idempotent producer, so make sure we have a producer id
                    /* 选择合适的节点,发送InitProducerIdRequest,如果响应没有错误,就更新ProducerIdAndEpoch */
                    maybeWaitForProducerId();
                } else if (transactionManager.hasUnresolvedSequences() && !transactionManager.hasFatalError()) {
                    transactionManager.transitionToFatalError(
                        new KafkaException("..."));
                /* true:事务请求被发送、拉取,或者一个FindCoordinator请求入队 */
                } else if (maybeSendAndPollTransactionalRequest()) {
                    return;
                }
                // do not continue sending if the transaction manager is in a failed state or if there
                // is no producer id (for the idempotent case).
                if (transactionManager.hasFatalError() || !transactionManager.hasProducerId()) {
                    RuntimeException lastError = transactionManager.lastError();
                    if (lastError != null)
                    	/* 如果IncompleteBatches非空,则 */
                    	/* 终止MemoryRecordsBuilder,也就是终止记录的追加 */
                    	/* 执行回调 */
            			/* 如果批次不是isSplitBatch(boolean), return buffers to the pool */   
                        maybeAbortBatches(lastError);
                    client.poll(retryBackoffMs, time.milliseconds());
                    return;
                } else if (transactionManager.hasAbortableError()) {
                	/* 终止MemoryRecordsBuilder,也就是终止记录的追加 */
                    /* 执行回调 */
            		/* 如果批次不是isSplitBatch(boolean), return buffers to the pool */   
                    accumulator.abortUndrainedBatches(transactionManager.lastError());
                }
            } catch (AuthenticationException e) {
                /* TransactionManager对于验证失败的处理 */
                transactionManager.authenticationFailed(e);
            }
        }
        long currentTimeMs = time.milliseconds();
        /* 发送消息到Kafka Cluster,第五篇文章会详细分析 */
        long pollTimeout = sendProducerData(currentTimeMs);
        client.poll(pollTimeout, currentTimeMs);
    }
  1. NetworkClient#poll(long timeout, long now)
	/**
     * Do actual reads and writes to sockets. 
     */
    @Override
    public List<ClientResponse> poll(long timeout, long now) {
    	/* 1. 确保状态是活跃状态 */
    	/* 保证AtomicReference<State> state属性值为State.ACTIVE */
        ensureActive();

		/* 2. 处理之前因为不支持的版本异常或者失去连接而中止的发送 */
		/* LinkedList<ClientResponse> */
        if (!abortedSends.isEmpty()) {
            // If there are aborted sends because of unsupported version exceptions or disconnects,
            // handle them immediately without waiting for Selector#poll.
            List<ClientResponse> responses = new ArrayList<>();
            /* 将那些中止的发送(abortedSends列表)添加到responses里,然后清空abortedSends列表 */
            handleAbortedSends(responses);
            /* 遍历responses,对于RequestCompletionHandler属性调用onCompletion() */
            completeResponses(responses);
            return responses;
        }

		/* 集群元数据更新完成的时间 */
        long metadataTimeout = metadataUpdater.maybeUpdate(now);
        try {
        	/* 拉取消息的核心方法,第四篇文章会详细分析 */
            this.selector.poll(Utils.min(timeout, metadataTimeout, defaultRequestTimeoutMs));
        } catch (IOException e) {
            
        }
        long updatedNow = this.time.milliseconds();
        List<ClientResponse> responses = new ArrayList<>();
        /* 处理发送完成的请求 (不需要响应的请求) */
        handleCompletedSends(responses, updatedNow);
        /* 处理响应完成的请求 */
        handleCompletedReceives(responses, updatedNow);
        /* 处理断开连接的节点 */
        handleDisconnections(responses, updatedNow);
        /* 处理新加入的连接 */
        handleConnections();
        /* 处理初始化ApiVersion请求 */
        handleInitiateApiVersionRequests(updatedNow);
        /* 处理超时请求 */
        handleTimedOutRequests(responses, updatedNow);
        /* 回调处理-主要步骤:记录信息,完成或者重试给定的批次 */
        completeResponses(responses);
        return responses;
    }
处理发送完成的请求 (不需要响应的请求)

NetworkClient:

private void handleCompletedSends(List<ClientResponse> responses, long now) {
    // if no response is expected then when the send is completed, return it
    /* 获取List<Send> completedSends属性,遍历 */
    for (Send send : this.selector.completedSends()) {
    	/* 1. 获取发送给指定节点的最近一次请求 */
        InFlightRequest request = this.inFlightRequests.lastSent(send.destination());
        if (!request.expectResponse) {
        	/* 2. 完成发送,将请求从队列中取出 */
            this.inFlightRequests.completeLastSent(send.destination());
            /* 3. 添加到ClientResponse列表中 */
            responses.add(request.completed(null, now));
        }
    }
}

NetworkClient.InFlightRequests:

/* 获取发送给指定节点的最近一次请求 */
public NetworkClient.InFlightRequest lastSent(String node) {
    return requestQueue(node).peekFirst();
}
/* 获取指定节点的请求队列 */
private Deque<NetworkClient.InFlightRequest> requestQueue(String node) {
    Deque<NetworkClient.InFlightRequest> reqs = requests.get(node);
    if (reqs == null || reqs.isEmpty())
        throw new IllegalStateException("There are no in-flight requests for node " + node);
    return reqs;
}
/* 完成发送 */
public NetworkClient.InFlightRequest completeLastSent(String node) {
	/* 从请求队列中取出 */
    NetworkClient.InFlightRequest inFlightRequest = requestQueue(node).pollFirst();
    inFlightRequestCount.decrementAndGet();
    return inFlightRequest;
}
处理响应完成的请求

可能会限制连接一段时间,对于MetadataResponse、ApiVersionsResponse进行额外处理,其他类型的响应添加到ClientResponse列表中

private void handleCompletedReceives(List<ClientResponse> responses, long now) {
	/* 获取ArrayList<NetworkReceive> completedReceives属性 */
    for (NetworkReceive receive : this.selector.completedReceives()) {
    	/* 获取String#source属性 */
        String source = receive.source();
        /* 从队列中拉取最早的InFlightRequest,并且inFlightRequestCount减一 */
        InFlightRequest req = inFlightRequests.completeNext(source);
        Struct responseStruct = parseStructMaybeUpdateThrottleTimeMetrics(receive.payload(), req.header,
            throttleTimeSensor, now);
        // If the received response includes a throttle delay, throttle the connection.
        AbstractResponse body = AbstractResponse.
                parseResponse(req.header.apiKey(), responseStruct, req.header.apiVersion());
        /* 可能限制连接 */
        maybeThrottle(body, req.header.apiVersion(), req.destination, now);
        if (req.isInternalRequest && body instanceof MetadataResponse)
        	/* 处理元数据请求的响应 */
            metadataUpdater.handleCompletedMetadataResponse(req.header, now, (MetadataResponse) body);
        else if (req.isInternalRequest && body instanceof ApiVersionsResponse)
        	/* 处理ApiVersions响应 */
            handleApiVersionsResponse(responses, req, now, (ApiVersionsResponse) body);
        else
        	/* 构造了一个ClientResponse,加入到List<ClientResponse>中 */
            responses.add(req.completed(body, now));
    }
}

**如果来自一个节点的响应的DEFAULT_THROTTLE_TIME大于0,并且客户端应该throttle,那么就限制指定节点的连接到某一时刻 **
在这里插入图片描述

在这里插入图片描述

处理断开连接的节点

更新reconnectBackoffMs,根据不同类型断开连接的情况打相应的日志,将构造的ClientReponse添加到ClientResponse列表中
在这里插入图片描述

private void processDisconnection(List<ClientResponse> responses,
                                  String nodeId,
                                  long now,
                                  ChannelState disconnectState) {
    /* 更新节点的reconnectBackoffMs */
    connectionStates.disconnected(nodeId, now);
    apiVersions.remove(nodeId);
    nodesNeedingApiVersionsFetch.remove(nodeId);
    switch (disconnectState.state()) {
        case AUTHENTICATION_FAILED:
            AuthenticationException exception = disconnectState.exception();
            connectionStates.authenticationFailed(nodeId, now, exception);
            metadataUpdater.handleFatalException(exception);     
            break;
        case AUTHENTICATE:
            break;
        case NOT_CONNECTED:
            break;
        default:
            break; 
    }
    for (InFlightRequest request : this.inFlightRequests.clearAll(nodeId)) {
        if (!request.isInternalRequest)
            responses.add(request.disconnected(now, disconnectState.exception()));
        else if (request.header.apiKey() == ApiKeys.METADATA)
            metadataUpdater.handleDisconnection(request.destination);
    }
}
处理新加入的连接

主要设置节点的连接状态
在这里插入图片描述

discoverBrokerVersions: True if we should send an ApiVersionRequest when first connecting to a broker

设置节点的连接状态为CHECKING_API_VERSIONS
在这里插入图片描述

设置节点的连接状态为READY
在这里插入图片描述
在这里插入图片描述

处理初始化ApiVersion请求

在这里插入图片描述

在这里插入图片描述

Returns true if the channel has handshake and authentication done.
Returns true if authentication is complete
在这里插入图片描述

判断是否可以发送更多的请求给指定节点
在这里插入图片描述

private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now) {
	/* 确保状态为ACTIVE */
    ensureActive();
    String nodeId = clientRequest.destination();
    if (!isInternalRequest) {    
        /* 判断是否已经连接、就绪、能够发送更多的请求给指定的连接 */
        if (!canSendRequest(nodeId, now))
            throw new IllegalStateException("......");
    }
    AbstractRequest.Builder<?> builder = clientRequest.requestBuilder();
    try {
        NodeApiVersions versionInfo = apiVersions.get(nodeId);
        short version;
        if (versionInfo == null) {
        	/* latestAllowedVersion */
            version = builder.latestAllowedVersion();     
        } else {
        	/* Get the latest version supported by the broker within an allowed range of versions */
            version = versionInfo.latestUsableVersion(clientRequest.apiKey(), builder.oldestAllowedVersion(),
                    builder.latestAllowedVersion());
        }
        doSend(clientRequest, isInternalRequest, now, builder.build(version));
    } catch (UnsupportedVersionException unsupportedVersionException) {
        ClientResponse clientResponse = new ClientResponse(clientRequest.makeHeader(builder.latestAllowedVersion()),
                clientRequest.callback(), clientRequest.destination(), now, now,
                false, unsupportedVersionException, null, null);
        abortedSends.add(clientResponse);
        if (isInternalRequest && clientRequest.apiKey() == ApiKeys.METADATA)
            metadataUpdater.handleFatalException(unsupportedVersionException);
    }
}
private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now, AbstractRequest request) {
    String destination = clientRequest.destination();
    /* 构建RequestHeader实例 */
    RequestHeader header = clientRequest.makeHeader(request.version());
    /* 构建NetworkSend实例 */
    Send send = request.toSend(destination, header);
    InFlightRequest inFlightRequest = new InFlightRequest(
            clientRequest,
            header,
            isInternalRequest,
            request,
            send,
            now);
    this.inFlightRequests.add(inFlightRequest);
    selector.send(send);
}
处理超时请求

NetworkClient:

    private void handleTimedOutRequests(List<ClientResponse> responses, long now) {
    	/* 1. 获取含有超时请求的节点ID列表 */
        List<String> nodeIds = this.inFlightRequests.nodesWithTimedOutRequests(now);
        for (String nodeId : nodeIds) {
            /* 2. close connection to the node */
            /* Kafka内部的Selector */
            this.selector.close(nodeId);
            /* 3. 处理节点失去连接的情况 */
            processDisconnection(responses, nodeId, now, ChannelState.LOCAL_CLOSE);
        }
        // we disconnected, so we should probably refresh our metadata
        if (!nodeIds.isEmpty())
            /* 4. 请求更新元数据 */
            metadataUpdater.requestUpdate();
    }
	/**
     * Returns a list of nodes with pending in-flight request, that need to be timed out
     */
    public List<String> nodesWithTimedOutRequests(long now) {
        List<String> nodeIds = new ArrayList<>();
        for (Map.Entry<String, Deque<NetworkClient.InFlightRequest>> requestEntry : requests.entrySet()) {
            String nodeId = requestEntry.getKey();
            Deque<NetworkClient.InFlightRequest> deque = requestEntry.getValue();
            if (hasExpiredRequest(now, deque))
                nodeIds.add(nodeId);
        }
        return nodeIds;
    }

	private Boolean hasExpiredRequest(long now, Deque<NetworkClient.InFlightRequest> deque) {
        for (NetworkClient.InFlightRequest request : deque) {
            long timeSinceSend = Math.max(0, now - request.sendTimeMs);
            /* 对于发送时间超过request.timeout.ms的,认定为超时,需要失效 */
            if (timeSinceSend > request.requestTimeoutMs)
                return true;
        }
        return false;
    }

NetworkClient:

    private void processDisconnection(List<ClientResponse> responses,
                                      String nodeId,
                                      long now,
                                      ChannelState disconnectState) {
        /* 3.1 更新节点连接状态、重连补偿时间、重连补偿最大时间 */
        connectionStates.disconnected(nodeId, now);
        /* ApiVersions */
        apiVersions.remove(nodeId);
        /* HashMap<String, ApiVersionsRequest.Builder> */
        nodesNeedingApiVersionsFetch.remove(nodeId);
        switch (disconnectState.state()) {
            case AUTHENTICATION_FAILED:
                AuthenticationException exception = disconnectState.exception();
                /* 3.2 更新节点连接状态等 */
                connectionStates.authenticationFailed(nodeId, now, exception);
                /* 3.3 元数据处理异常情况 */
                metadataUpdater.handleFatalException(exception);
                break;
            case AUTHENTICATE:
                break;
            case NOT_CONNECTED:
                break;
            default:
                break; 
        }
        for (InFlightRequest request : this.inFlightRequests.clearAll(nodeId)) {
            log.trace("......");
            if (!request.isInternalRequest)
            	/* 3.4 添加一个对应的ClientResponse */
                responses.add(request.disconnected(now, disconnectState.exception()));
            else if (request.header.apiKey() == ApiKeys.METADATA)
            	/* 3.5 元数据处理失去连接的情况 */
                metadataUpdater.handleDisconnection(request.destination);
        }
    }

3.1

	public void disconnected(String id, long now) {
        NodeConnectionState nodeState = nodeState(id);
        nodeState.state = ConnectionState.DISCONNECTED;
        nodeState.lastConnectAttemptMs = now;
        updateReconnectBackoff(nodeState);
    }

	private NodeConnectionState nodeState(String id) {
		/* Map<String, NodeConnectionState> */
        NodeConnectionState state = this.nodeState.get(id);
        if (state == null)
            throw new IllegalStateException("No entry found for connection " + id);
        return state;
    }

	/**
     * Update the node reconnect backoff exponentially.
     * The delay is reconnect.backoff.ms * 2**(failures - 1) * (+/- 20% random jitter)
     * Up to a (pre-jitter) maximum of reconnect.backoff.max.ms
     */
    private void updateReconnectBackoff(NodeConnectionState nodeState) {
        if (this.reconnectBackoffMaxMs > this.reconnectBackoffInitMs) {
            nodeState.failedAttempts += 1;
            double backoffExp = Math.min(nodeState.failedAttempts - 1, this.reconnectBackoffMaxExp);
            double backoffFactor = Math.pow(RECONNECT_BACKOFF_EXP_BASE, backoffExp);
            long reconnectBackoffMs = (long) (this.reconnectBackoffInitMs * backoffFactor);
            // Actual backoff is randomized to avoid connection storms.
            double randomFactor = ThreadLocalRandom.current().nextDouble(0.8, 1.2);
            nodeState.reconnectBackoffMs = (long) (randomFactor * reconnectBackoffMs);
        }
    }

3.2

	public void authenticationFailed(String id, long now, AuthenticationException exception) {
        NodeConnectionState nodeState = nodeState(id);
        nodeState.authenticationException = exception;
        nodeState.state = ConnectionState.AUTHENTICATION_FAILED;
        nodeState.lastConnectAttemptMs = now;
        updateReconnectBackoff(nodeState);
    }

3.3

		@Override
        public void handleFatalException(KafkaException fatalException) {
            if (metadata.updateRequested())
                metadata.failedUpdate(time.milliseconds(), fatalException);
            inProgressRequestVersion = null;
        }

		public synchronized boolean updateRequested() {
        	return this.needUpdate;
    	}

		public synchronized void failedUpdate(long now, KafkaException fatalException) {
        	this.lastRefreshMs = now;
        	this.fatalException = fatalException;
    	}

3.4

		public ClientResponse disconnected(long timeMs, AuthenticationException authenticationException) {
            return new ClientResponse(header, callback, destination, createdTimeMs, timeMs,
                    true, null, authenticationException, null);
        }

3.5

		@Override
        public void handleDisconnection(String destination) {
            Cluster cluster = metadata.fetch();
            if (cluster.isBootstrapConfigured()) {
                int nodeId = Integer.parseInt(destination);
                Node node = cluster.nodeById(nodeId);
                if (node != null)
                    log.warn("Bootstrap broker {} disconnected", node);
            }
            inProgressRequestVersion = null;
        }

NetworkClient:

		@Override
        public void requestUpdate() {
        	/* Metadata */
            this.metadata.requestUpdate();
        }

Metadata:

	/**
     * Request an update of the current cluster metadata info, return the current updateVersion before the update
     */
    public synchronized int requestUpdate() {
        this.needUpdate = true;
        return this.updateVersion;
    }
completeResponses(…) - 完成最后的响应处理

主要步骤:记录信息,完成或者重试给定的批次

NetworkClient:

	private void completeResponses(List<ClientResponse> responses) {
        for (ClientResponse response : responses) {
            try {
                response.onComplete();
            } catch (Exception e) {
                log.error("Uncaught error in request completion:", e);
            }
        }
    }

ClientResponse:

	public void onComplete() {
        if (callback != null)
            callback.onComplete(this);
    }

Sender:

	/**
     * Handle a produce response - 记录信息,完成或者重试给定的批次
     */
    private void handleProduceResponse(ClientResponse response, Map<TopicPartition, ProducerBatch> batches, long now) {
        RequestHeader requestHeader = response.requestHeader();
        long receivedTimeMs = response.receivedTimeMs();
        int correlationId = requestHeader.correlationId();
        if (response.wasDisconnected()) {
            for (ProducerBatch batch : batches.values())
                completeBatch(batch, new ProduceResponse.PartitionResponse(Errors.NETWORK_EXCEPTION), correlationId, now, 0L);
        } else if (response.versionMismatch() != null) {
            for (ProducerBatch batch : batches.values())
                completeBatch(batch, new ProduceResponse.PartitionResponse(Errors.UNSUPPORTED_VERSION), correlationId, now, 0L);
        } else { 
            if (response.hasResponse()) {
                ProduceResponse produceResponse = (ProduceResponse) response.responseBody();
                for (Map.Entry<TopicPartition, ProduceResponse.PartitionResponse> entry : produceResponse.responses().entrySet()) {
                    TopicPartition tp = entry.getKey();
                    ProduceResponse.PartitionResponse partResp = entry.getValue();
                    ProducerBatch batch = batches.get(tp);
                    completeBatch(batch, partResp, correlationId, now, receivedTimeMs + produceResponse.throttleTimeMs());
                }
                /* 1. 记录一些信息 */
                this.sensors.recordLatency(response.destination(), response.requestLatencyMs());
            } else {
                // this is the acks = 0 case, just complete all requests
                for (ProducerBatch batch : batches.values()) {
                	/* 2. 完成或者重试给定的批次 */
                    completeBatch(batch, new ProduceResponse.PartitionResponse(Errors.NONE), correlationId, now, 0L);
                }
            }
        }
  1. 记录相关信息
		public void recordLatency(String node, long latency) {
            long now = time.milliseconds();
            this.requestTimeSensor.record(latency, now);
            if (!node.isEmpty()) {
                String nodeTimeName = "node-" + node + ".latency";
                Sensor nodeRequestTime = this.metrics.getSensor(nodeTimeName);
                if (nodeRequestTime != null)
                    nodeRequestTime.record(latency, now);
            }
        }
	public void record(double value, long timeMs, boolean checkQuotas) {
        if (shouldRecord()) {
            this.lastRecordTime = timeMs;
            synchronized (this) {
                synchronized (metricLock()) {
                    for (Stat stat : this.stats)
                        stat.record(config, value, timeMs);
                }
                if (checkQuotas)
                    checkQuotas(timeMs);
            }
            for (Sensor parent : parents)
                parent.record(value, timeMs, checkQuotas);
        }
    }
  1. 完成或者重试给定的批次
	/**
     * Complete or retry the given batch of records.
     */
    private void completeBatch(ProducerBatch batch, ProduceResponse.PartitionResponse response, long correlationId,
                               long now, long throttleUntilTimeMs) {
        Errors error = response.error;

        if (error == Errors.MESSAGE_TOO_LARGE && batch.recordCount > 1 && !batch.isDone() &&
                (batch.magic() >= RecordBatch.MAGIC_VALUE_V2 || batch.isCompressed())) {
          
            if (transactionManager != null)
            	/* 删除InFlightBatch */
                transactionManager.removeInFlightBatch(batch);
            this.accumulator.splitAndReenqueue(batch);
            /* 删除指定ProducerBatch, 释放占用的空间 */
            maybeRemoveAndDeallocateBatch(batch);
            this.sensors.recordBatchSplit();
        } else if (error != Errors.NONE) {
        	/* 判断是否能重试 */
            if (canRetry(batch, response, now)) {       
                if (transactionManager == null) {
                	/* 将批次重新加入到队列中 */
                    reenqueueBatch(batch, now);
                /* 判断ProducerIdAndEpoch的producerId和epoch是否与给定的相等 */
                } else if (transactionManager.hasProducerIdAndEpoch(batch.producerId(), batch.producerEpoch())) {         
                    /* 将批次重新加入到队列中 */
                    reenqueueBatch(batch, now);
                } else {
                	/* 批次失败处理 */
                    failBatch(batch, response, new OutOfOrderSequenceException("......"), false);
                }
            /* 有重复的序列号 */
            } else if (error == Errors.DUPLICATE_SEQUENCE_NUMBER) {
            	/* 批次完成处理 */
                completeBatch(batch, response);
            } else {
                final RuntimeException exception;
                if (error == Errors.TOPIC_AUTHORIZATION_FAILED)
                    exception = new TopicAuthorizationException(batch.topicPartition.topic());
                else if (error == Errors.CLUSTER_AUTHORIZATION_FAILED)
                    exception = new ClusterAuthorizationException("The producer is not authorized to do idempotent sends");
                else
                    exception = error.exception();
                /* 批次失败处理 */
                failBatch(batch, response, exception, batch.attempts() < this.retries);
            }
            if (error.exception() instanceof InvalidMetadataException) {       
                metadata.requestUpdate();
            }
        } else {
        	/* 批次完成处理 */
            completeBatch(batch, response);
        }

        /* indicate whether the producer should guarantee the message order on the broker or not */
        if (guaranteeMessageOrder)
        	/* 将其加入到HashMap<TopicPartition, Long>中 */
            this.accumulator.unmutePartition(batch.topicPartition, throttleUntilTimeMs);
    }

将批次重新加入到队列中

private void reenqueueBatch(ProducerBatch batch, long currentTimeMs) {
	/* 将批次重新入队 */
    this.accumulator.reenqueue(batch, currentTimeMs);
    /* 删除批次 */
    maybeRemoveFromInflightBatches(batch);
    this.sensors.recordRetries(batch.topicPartition.topic(), batch.recordCount);
}

public void reenqueue(ProducerBatch batch, long now) {
    batch.reenqueued(now);
    Deque<ProducerBatch> deque = getOrCreateDeque(batch.topicPartition);
    synchronized (deque) {
        if (transactionManager != null)
        	/* 按照序列号顺序插入批次到队列中 */
            insertInSequenceOrder(deque, batch);
        else
            deque.addFirst(batch);
    }
}

void reenqueued(long now) {
    attempts.getAndIncrement();
    lastAttemptMs = Math.max(lastAppendTime, now);
    lastAppendTime = Math.max(lastAppendTime, now);
    retry = true;
}

failBatch(…) - 批次失败处理

private void failBatch(ProducerBatch batch,
                       ProduceResponse.PartitionResponse response,
                       RuntimeException exception,
                       boolean adjustSequenceNumbers) {
    failBatch(batch, response.baseOffset, response.logAppendTime, exception, adjustSequenceNumbers);
}

private void failBatch(ProducerBatch batch,
                       long baseOffset,
                       long logAppendTime,
                       RuntimeException exception,
                       boolean adjustSequenceNumbers) {
    if (transactionManager != null) {
        transactionManager.handleFailedBatch(batch, exception, adjustSequenceNumbers);
    }

    this.sensors.recordErrors(batch.topicPartition.topic(), batch.recordCount);
	/* 如果 原子设置FinalState,ProduceRequestResult设置相关属性,Thunk触发回调 成功*/
    if (batch.done(baseOffset, logAppendTime, exception)) {
    	/* 删除指定ProducerBatch, 释放占用的空间 */
        maybeRemoveAndDeallocateBatch(batch);
    }
}
/* TransactionManager */
public synchronized void handleFailedBatch(ProducerBatch batch, RuntimeException exception, boolean adjustSequenceNumbers) {
	/* 传递异常状态 */
    maybeTransitionToErrorState(exception);
	/* 判断ProducerIdAndEpoch的producerId和epoch是否与给定的相等 */
    if (!hasProducerIdAndEpoch(batch.producerId(), batch.producerEpoch())) {   
        return;
    }

    if (exception instanceof OutOfOrderSequenceException && !isTransactional()) {    
		/* 重置ProducerIdAndEpoch */
        resetProducerId();
    } else {
    	/* 删除InFlightBatch */
        removeInFlightBatch(batch);
        if (adjustSequenceNumbers)
        	/* 调整序列号 */
            adjustSequencesDueToFailedBatch(batch);
    }
}

判断是否能重试

synchronized boolean canRetry(ProduceResponse.PartitionResponse response, ProducerBatch batch) {
	/* 判断ProducerIdAndEpoch的producerId和epoch是否与给定的相等 */
    if (!hasProducerIdAndEpoch(batch.producerId(), batch.producerEpoch()))
        return false;
    Errors error = response.error;
    /* && Set<TopicPartition>.containers(topicPartition) && (reopened || sequence - lastAckedSequence(topicPartition).orElse(NO_LAST_ACKED_SEQUENCE_NUMBER) == 1) */
    if (error == Errors.OUT_OF_ORDER_SEQUENCE_NUMBER && !hasUnresolvedSequence(batch.topicPartition) &&
            (batch.sequenceHasBeenReset() || !isNextSequence(batch.topicPartition, batch.baseSequence())))
        return true;
    if (error == Errors.UNKNOWN_PRODUCER_ID) {
        if (response.logStartOffset == -1)
            return true;
        /* reopened */
        if (batch.sequenceHasBeenReset()) {
            return true;
        } else if (lastAckedOffset(batch.topicPartition).orElse(NO_LAST_ACKED_SEQUENCE_NUMBER) < response.logStartOffset) {
        	/* 修改序列号 */
            startSequencesAtBeginning(batch.topicPartition);
            return true;
        }
    }
    return false;
}

/* 修改序列号 */
private void startSequencesAtBeginning(TopicPartition topicPartition) {
    final AtomicInteger sequence = new AtomicInteger(0);
    topicPartitionBookkeeper.getPartition(topicPartition).resetSequenceNumbers(inFlightBatch -> {  
        inFlightBatch.resetProducerState(new ProducerIdAndEpoch(inFlightBatch.producerId(),
                inFlightBatch.producerEpoch()), sequence.get(), inFlightBatch.isTransactional());
        sequence.getAndAdd(inFlightBatch.recordCount);
    });
    setNextSequence(topicPartition, sequence.get());
    topicPartitionBookkeeper.getPartition(topicPartition).lastAckedSequence = NO_LAST_ACKED_SEQUENCE_NUMBER;
}

completeBatch(…) - 完成批次处理

	private void completeBatch(ProducerBatch batch, ProduceResponse.PartitionResponse response) {
        if (transactionManager != null) {
        	/* 判断ProducerIdAndEpoch的producerId和epoch是否与给定的相等,不相等,则debug相关日志,然后返回 */
        	/* 更新lastAckedSequence */
        	/* 更新lastAckedOffset */
        	/* 删除InFlightBatch */
            transactionManager.handleCompletedBatch(batch, response);
        }
		/* 如果 原子设置FinalState,ProduceRequestResult设置相关属性,Thunk触发回调 成功*/
        if (batch.done(response.baseOffset, response.logAppendTime, null)) {
        	/* 删除指定ProducerBatch, 释放占用的空间 */
            maybeRemoveAndDeallocateBatch(batch);
        }
    }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章