flink的處理邏輯包括
1:transform算子類型(我們指定的map,filter轉換算子)
2:operator(我們實現的Function具體業務)
3:partition(數據的分區分組劃分,上一個算子處理後的數據,如何傳輸給下一個算子。)
StreamPartitioner是partition的接口。需要實現這個接口,才能實現數據傳輸的邏輯。
1:copy;
2:int[] selectChannels(T record, int numChannels);
channel是數據往下游傳輸的通道,帶有下標index。可以通過selectChannels來設置不同的傳輸方式。
處理keyby通過重寫datastream。這種按key的value做hash取模的分組方式外。其他的分組方式,都是包裝在Datastream對象裏。也可以實現自己的分組方式。通過partitionCustom,收到CustomPartitionerWrapper實現的限制,只能指定一個channel通道。
1:shuffle,隨機的發給下一個算子
2:broadcast,往下游的所有算子,全部一次。
3:以及各種不常用的分組方式。
發生數據重分區就會涉及到序列化,和網絡傳輸。
flink的序列化使用到了TypeSerializer序列化常見類型,涉及sun的Unsafe類和自己的內存管理實現。如果TypeSerializer序列化不了的就用kyro序列化。
flink的網絡使用akka,涉及到網絡的模塊,很多都用scala寫。
每個記錄通過SerializationDelegate包裝起來。
每一個邏輯分區有一個ResultPartition對象。RecordWriter持有一個ResultPartitionWriter可以選擇往哪個寫數據。每個任務持有一個RecordWriterOutput,每個RecordWriterOutput持有一個RecordWriter。
我們寫的業務邏輯Function,持有一個Collector可以調用collect,持有RecordWriterOutput間接的調用RecordWriterOutput的那一套數據傳輸
數據先write到ResultPartition的ResultSubpartition對象裏。該對象有兩個實現類。一個使用iomanager模塊的RequestQueue(LinkedBlockingQueue阻塞隊列)。一個使用java.util.ArrayDeque雙向隊列。
通過ResultPartitionConsumableNotifier.notifyPartitionConsumable。通知這個ResultPartition對應的下游來消費數據。網絡傳輸走akka,通過ActorGateway.ask進行網絡通信。
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.io.network.api.writer;
import org.apache.flink.core.io.IOReadableWritable;
import org.apache.flink.runtime.accumulators.AccumulatorRegistry;
import org.apache.flink.runtime.event.AbstractEvent;
import org.apache.flink.runtime.io.network.api.serialization.RecordSerializer;
import org.apache.flink.runtime.io.network.api.serialization.SpanningRecordSerializer;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import java.io.IOException;
import static org.apache.flink.runtime.io.network.api.serialization.RecordSerializer.SerializationResult;
/**
* A record-oriented runtime result writer.
* <p>
* The RecordWriter wraps the runtime's {@link ResultPartitionWriter} and takes care of
* serializing records into buffers.
* <p>
* <strong>Important</strong>: it is necessary to call {@link #flush()} after
* all records have been written with {@link #emit(IOReadableWritable)}. This
* ensures that all produced records are written to the output stream (incl.
* partially filled ones).
*
* @param <T> the type of the record that can be emitted with this record writer
*/
public class RecordWriter<T extends IOReadableWritable> {
protected final ResultPartitionWriter writer;
private final ChannelSelector<T> channelSelector;
private final int numChannels;
/** {@link RecordSerializer} per outgoing channel */
private final RecordSerializer<T>[] serializers;
public RecordWriter(ResultPartitionWriter writer) {
this(writer, new RoundRobinChannelSelector<T>());
}
@SuppressWarnings("unchecked")
public RecordWriter(ResultPartitionWriter writer, ChannelSelector<T> channelSelector) {
this.writer = writer;
this.channelSelector = channelSelector;
this.numChannels = writer.getNumberOfOutputChannels();
/**
* The runtime exposes a channel abstraction for the produced results
* (see {@link ChannelSelector}). Every channel has an independent
* serializer.
*/
this.serializers = new SpanningRecordSerializer[numChannels];
for (int i = 0; i < numChannels; i++) {
serializers[i] = new SpanningRecordSerializer<T>();
}
}
public void emit(T record) throws IOException, InterruptedException {
for (int targetChannel : channelSelector.selectChannels(record, numChannels)) {
// serialize with corresponding serializer and send full buffer
RecordSerializer<T> serializer = serializers[targetChannel];
synchronized (serializer) {
SerializationResult result = serializer.addRecord(record);
while (result.isFullBuffer()) {
Buffer buffer = serializer.getCurrentBuffer();
if (buffer != null) {
writeBuffer(buffer, targetChannel, serializer);
}
buffer = writer.getBufferProvider().requestBufferBlocking();
result = serializer.setNextBuffer(buffer);
}
}
}
}
/**
* This is used to broadcast Streaming Watermarks in-band with records. This ignores
* the {@link ChannelSelector}.
*/
public void broadcastEmit(T record) throws IOException, InterruptedException {
for (int targetChannel = 0; targetChannel < numChannels; targetChannel++) {
// serialize with corresponding serializer and send full buffer
RecordSerializer<T> serializer = serializers[targetChannel];
synchronized (serializer) {
SerializationResult result = serializer.addRecord(record);
while (result.isFullBuffer()) {
Buffer buffer = serializer.getCurrentBuffer();
if (buffer != null) {
writeBuffer(buffer, targetChannel, serializer);
}
buffer = writer.getBufferProvider().requestBufferBlocking();
result = serializer.setNextBuffer(buffer);
}
}
}
}
public void broadcastEvent(AbstractEvent event) throws IOException, InterruptedException {
for (int targetChannel = 0; targetChannel < numChannels; targetChannel++) {
RecordSerializer<T> serializer = serializers[targetChannel];
synchronized (serializer) {
if (serializer.hasData()) {
Buffer buffer = serializer.getCurrentBuffer();
if (buffer == null) {
throw new IllegalStateException("Serializer has data but no buffer.");
}
writeBuffer(buffer, targetChannel, serializer);
writer.writeEvent(event, targetChannel);
buffer = writer.getBufferProvider().requestBufferBlocking();
serializer.setNextBuffer(buffer);
}
else {
writer.writeEvent(event, targetChannel);
}
}
}
}
public void sendEndOfSuperstep() throws IOException, InterruptedException {
for (int targetChannel = 0; targetChannel < numChannels; targetChannel++) {
RecordSerializer<T> serializer = serializers[targetChannel];
synchronized (serializer) {
Buffer buffer = serializer.getCurrentBuffer();
if (buffer != null) {
writeBuffer(buffer, targetChannel, serializer);
buffer = writer.getBufferProvider().requestBufferBlocking();
serializer.setNextBuffer(buffer);
}
}
}
writer.writeEndOfSuperstep();
}
public void flush() throws IOException {
for (int targetChannel = 0; targetChannel < numChannels; targetChannel++) {
RecordSerializer<T> serializer = serializers[targetChannel];
synchronized (serializer) {
try {
Buffer buffer = serializer.getCurrentBuffer();
if (buffer != null) {
writeBuffer(buffer, targetChannel, serializer);
}
} finally {
serializer.clear();
}
}
}
}
public void clearBuffers() {
for (RecordSerializer<?> serializer : serializers) {
synchronized (serializer) {
try {
Buffer buffer = serializer.getCurrentBuffer();
if (buffer != null) {
buffer.recycle();
}
}
finally {
serializer.clear();
}
}
}
}
/**
* Counter for the number of records emitted and the records processed.
*/
public void setReporter(AccumulatorRegistry.Reporter reporter) {
for(RecordSerializer<?> serializer : serializers) {
serializer.setReporter(reporter);
}
}
/**
* Writes the buffer to the {@link ResultPartitionWriter}.
*
* <p> The buffer is cleared from the serializer state after a call to this method.
*/
private void writeBuffer(
Buffer buffer,
int targetChannel,
RecordSerializer<T> serializer) throws IOException {
try {
writer.writeBuffer(buffer, targetChannel);
}
finally {
serializer.clearCurrentBuffer();
}
}
}
111
package com.alibaba.flink.train.streaming;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class HelloWorld {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment
.getExecutionEnvironment();
env.setParallelism(5);// 併發度
DataStream<String> dataStream = env
.readTextFile("D:/flinkdata/helloworld"); // 1:(flink storm
// )(hadoop hive)
dataStream = env.addSource(new MemSource());
dataStream
.shuffle()//隨機分配
// .global()//全部發一遍
// .broadcast()//廣播全部發一遍,env.乘以setParallelism(5)
.flatMap(
new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String input,
Collector<Tuple2<String, Integer>> collector)
throws Exception {
String[] objs = input.split(" ");
for (String obj : objs) {
collector
.collect(new Tuple2<String, Integer>(
obj, 1));// (這裏很關鍵,表示0位置是word,1的位置是1次數)
}
}
})// 2:(flink 1)(storm 1)
.keyBy(0)// 3:以第0個位置的值,做分區。
.sum(1)// (flink:8)(storm:5),對第1個位置的值做sum的操作。
// .flatMap(new SumSingleValueStateFunction())// 做單個key的sum
.addSink(new MemSink());
// .printToErr();
env.execute();// 啓動任務
while (true) {
}
}
}
2222
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.runtime.partitioner;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.runtime.plugable.SerializationDelegate;
import org.apache.flink.runtime.util.MathUtils;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
/**
* Partitioner selects the target channel based on the hash value of a key from a
* {@link KeySelector}.
*
* @param <T> Type of the elements in the Stream being partitioned
*/
@Internal
public class HashPartitioner<T> extends StreamPartitioner<T> {
private static final long serialVersionUID = 1L;
private int[] returnArray = new int[1];
KeySelector<T, ?> keySelector;
public HashPartitioner(KeySelector<T, ?> keySelector) {
this.keySelector = keySelector;
}
@Override
public int[] selectChannels(SerializationDelegate<StreamRecord<T>> record,
int numberOfOutputChannels) {
Object key;
try {
key = keySelector.getKey(record.getInstance().getValue());
} catch (Exception e) {
throw new RuntimeException("Could not extract key from " + record.getInstance().getValue(), e);
}
returnArray[0] = MathUtils.murmurHash(key.hashCode()) % numberOfOutputChannels;
return returnArray;
}
@Override
public StreamPartitioner<T> copy() {
return this;
}
@Override
public String toString() {
return "HASH";
}
}
333
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.typeutils;
import java.io.IOException;
import java.io.Serializable;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
/**
* This interface describes the methods that are required for a data type to be handled by the pact
* runtime. Specifically, this interface contains the serialization and copying methods.
* <p>
* The methods in this class are assumed to be stateless, such that it is effectively thread safe. Stateful
* implementations of the methods may lead to unpredictable side effects and will compromise both stability and
* correctness of the program.
*
* @param <T> The data type that the serializer serializes.
*/
@PublicEvolving
public abstract class TypeSerializer<T> implements Serializable {
private static final long serialVersionUID = 1L;
// --------------------------------------------------------------------------------------------
// General information about the type and the serializer
// --------------------------------------------------------------------------------------------
/**
* Gets whether the type is an immutable type.
*
* @return True, if the type is immutable.
*/
public abstract boolean isImmutableType();
/**
* Creates a deep copy of this serializer if it is necessary, i.e. if it is stateful. This
* can return itself if the serializer is not stateful.
*
* We need this because Serializers might be used in several threads. Stateless serializers
* are inherently thread-safe while stateful serializers might not be thread-safe.
*/
public abstract TypeSerializer<T> duplicate();
// --------------------------------------------------------------------------------------------
// Instantiation & Cloning
// --------------------------------------------------------------------------------------------
/**
* Creates a new instance of the data type.
*
* @return A new instance of the data type.
*/
public abstract T createInstance();
/**
* Creates a deep copy of the given element in a new element.
*
* @param from The element reuse be copied.
* @return A deep copy of the element.
*/
public abstract T copy(T from);
/**
* Creates a copy from the given element.
* The method makes an attempt to store the copy in the given reuse element, if the type is mutable.
* This is, however, not guaranteed.
*
* @param from The element to be copied.
* @param reuse The element to be reused. May or may not be used.
* @return A deep copy of the element.
*/
public abstract T copy(T from, T reuse);
// --------------------------------------------------------------------------------------------
/**
* Gets the length of the data type, if it is a fix length data type.
*
* @return The length of the data type, or <code>-1</code> for variable length data types.
*/
public abstract int getLength();
// --------------------------------------------------------------------------------------------
/**
* Serializes the given record to the given target output view.
*
* @param record The record to serialize.
* @param target The output view to write the serialized data to.
*
* @throws IOException Thrown, if the serialization encountered an I/O related error. Typically raised by the
* output view, which may have an underlying I/O channel to which it delegates.
*/
public abstract void serialize(T record, DataOutputView target) throws IOException;
/**
* De-serializes a record from the given source input view.
*
* @param source The input view from which to read the data.
* @return The deserialized element.
*
* @throws IOException Thrown, if the de-serialization encountered an I/O related error. Typically raised by the
* input view, which may have an underlying I/O channel from which it reads.
*/
public abstract T deserialize(DataInputView source) throws IOException;
/**
* De-serializes a record from the given source input view into the given reuse record instance if mutable.
*
* @param reuse The record instance into which to de-serialize the data.
* @param source The input view from which to read the data.
* @return The deserialized element.
*
* @throws IOException Thrown, if the de-serialization encountered an I/O related error. Typically raised by the
* input view, which may have an underlying I/O channel from which it reads.
*/
public abstract T deserialize(T reuse, DataInputView source) throws IOException;
/**
* Copies exactly one record from the source input view to the target output view. Whether this operation
* works on binary data or partially de-serializes the record to determine its length (such as for records
* of variable length) is up to the implementer. Binary copies are typically faster. A copy of a record containing
* two integer numbers (8 bytes total) is most efficiently implemented as
* {@code target.write(source, 8);}.
*
* @param source The input view from which to read the record.
* @param target The target output view to which to write the record.
*
* @throws IOException Thrown if any of the two views raises an exception.
*/
public abstract void copy(DataInputView source, DataOutputView target) throws IOException;
public abstract boolean equals(Object obj);
/**
* Returns true if the given object can be equaled with this object. If not, it returns false.
*
* @param obj Object which wants to take part in the equality relation
* @return true if obj can be equaled with this, otherwise false
*/
public abstract boolean canEqual(Object obj);
public abstract int hashCode();
}
4444
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.io.network.partition;
import org.apache.flink.runtime.executiongraph.IntermediateResultPartition;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.disk.iomanager.IOManager.IOMode;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.BufferPool;
import org.apache.flink.runtime.io.network.buffer.BufferPoolOwner;
import org.apache.flink.runtime.io.network.buffer.BufferProvider;
import org.apache.flink.runtime.io.network.partition.consumer.LocalInputChannel;
import org.apache.flink.runtime.io.network.partition.consumer.RemoteInputChannel;
import org.apache.flink.runtime.jobgraph.DistributionPattern;
import org.apache.flink.api.common.JobID;
import org.apache.flink.runtime.taskmanager.TaskManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkElementIndex;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
/**
* A result partition for data produced by a single task.
*
* <p> This class is the runtime part of a logical {@link IntermediateResultPartition}. Essentially,
* a result partition is a collection of {@link Buffer} instances. The buffers are organized in one
* or more {@link ResultSubpartition} instances, which further partition the data depending on the
* number of consuming tasks and the data {@link DistributionPattern}.
*
* <p> Tasks, which consume a result partition have to request one of its subpartitions. The request
* happens either remotely (see {@link RemoteInputChannel}) or locally (see {@link LocalInputChannel})
*
* <h2>Life-cycle</h2>
*
* The life-cycle of each result partition has three (possibly overlapping) phases:
* <ol>
* <li><strong>Produce</strong>: </li>
* <li><strong>Consume</strong>: </li>
* <li><strong>Release</strong>: </li>
* </ol>
*
* <h2>Lazy deployment and updates of consuming tasks</h2>
*
* Before a consuming task can request the result, it has to be deployed. The time of deployment
* depends on the PIPELINED vs. BLOCKING characteristic of the result partition. With pipelined
* results, receivers are deployed as soon as the first buffer is added to the result partition.
* With blocking results on the other hand, receivers are deployed after the partition is finished.
*
* <h2>Buffer management</h2>
*
* <h2>State management</h2>
*/
public class ResultPartition implements BufferPoolOwner {
private static final Logger LOG = LoggerFactory.getLogger(ResultPartition.class);
private final String owningTaskName;
private final JobID jobId;
private final ResultPartitionID partitionId;
/** Type of this partition. Defines the concrete subpartition implementation to use. */
private final ResultPartitionType partitionType;
/**
* Flag indicating whether to eagerly deploy consumers.
*
* <p>If <code>true</code>, the consumers are deployed as soon as the
* runtime result is registered at the result manager of the task manager.
*/
private final boolean eagerlyDeployConsumers;
/** The subpartitions of this partition. At least one. */
private final ResultSubpartition[] subpartitions;
private final ResultPartitionManager partitionManager;
private final ResultPartitionConsumableNotifier partitionConsumableNotifier;
// - Runtime state --------------------------------------------------------
private final AtomicBoolean isReleased = new AtomicBoolean();
/**
* The total number of references to subpartitions of this result. The result partition can be
* safely released, iff the reference count is zero. A reference count of -1 denotes that the
* result partition has been released.
*/
private final AtomicInteger pendingReferences = new AtomicInteger();
private BufferPool bufferPool;
private boolean hasNotifiedPipelinedConsumers;
private boolean isFinished;
private volatile Throwable cause;
// - Statistics ----------------------------------------------------------
/** The total number of buffers (both data and event buffers) */
private int totalNumberOfBuffers;
/** The total number of bytes (both data and event buffers) */
private long totalNumberOfBytes;
public ResultPartition(
String owningTaskName,
JobID jobId,
ResultPartitionID partitionId,
ResultPartitionType partitionType,
boolean eagerlyDeployConsumers,
int numberOfSubpartitions,
ResultPartitionManager partitionManager,
ResultPartitionConsumableNotifier partitionConsumableNotifier,
IOManager ioManager,
IOMode defaultIoMode) {
this.owningTaskName = checkNotNull(owningTaskName);
this.jobId = checkNotNull(jobId);
this.partitionId = checkNotNull(partitionId);
this.partitionType = checkNotNull(partitionType);
this.eagerlyDeployConsumers = eagerlyDeployConsumers;
this.subpartitions = new ResultSubpartition[numberOfSubpartitions];
this.partitionManager = checkNotNull(partitionManager);
this.partitionConsumableNotifier = checkNotNull(partitionConsumableNotifier);
// Create the subpartitions.
switch (partitionType) {
case BLOCKING:
for (int i = 0; i < subpartitions.length; i++) {
subpartitions[i] = new SpillableSubpartition(
i, this, ioManager, defaultIoMode);
}
break;
case PIPELINED:
for (int i = 0; i < subpartitions.length; i++) {
subpartitions[i] = new PipelinedSubpartition(i, this);
}
break;
default:
throw new IllegalArgumentException("Unsupported result partition type.");
}
// Initially, partitions should be consumed once before release.
pin();
LOG.debug("{}: Initialized {}", owningTaskName, this);
}
/**
* Registers a buffer pool with this result partition.
* <p>
* There is one pool for each result partition, which is shared by all its sub partitions.
* <p>
* The pool is registered with the partition *after* it as been constructed in order to conform
* to the life-cycle of task registrations in the {@link TaskManager}.
*/
public void registerBufferPool(BufferPool bufferPool) {
checkArgument(bufferPool.getNumberOfRequiredMemorySegments() >= getNumberOfSubpartitions(),
"Bug in result partition setup logic: Buffer pool has not enough guaranteed buffers for this result partition.");
checkState(this.bufferPool == null, "Bug in result partition setup logic: Already registered buffer pool.");
this.bufferPool = checkNotNull(bufferPool);
// If the partition type is back pressure-free, we register with the buffer pool for
// callbacks to release memory.
if (!partitionType.hasBackPressure()) {
bufferPool.setBufferPoolOwner(this);
}
}
public JobID getJobId() {
return jobId;
}
public ResultPartitionID getPartitionId() {
return partitionId;
}
public int getNumberOfSubpartitions() {
return subpartitions.length;
}
/**
* Returns whether consumers should be deployed eagerly (as soon as they
* are registered at the result manager of the task manager).
*
* @return Whether consumers should be deployed eagerly
*/
public boolean getEagerlyDeployConsumers() {
return eagerlyDeployConsumers;
}
public BufferProvider getBufferProvider() {
return bufferPool;
}
public int getTotalNumberOfBuffers() {
return totalNumberOfBuffers;
}
public long getTotalNumberOfBytes() {
return totalNumberOfBytes;
}
// ------------------------------------------------------------------------
/**
* Adds a buffer to the subpartition with the given index.
*
* <p> For PIPELINED results, this will trigger the deployment of consuming tasks after the
* first buffer has been added.
*/
public void add(Buffer buffer, int subpartitionIndex) throws IOException {
boolean success = false;
try {
checkInProduceState();
final ResultSubpartition subpartition = subpartitions[subpartitionIndex];
synchronized (subpartition) {
success = subpartition.add(buffer);
// Update statistics
totalNumberOfBuffers++;
totalNumberOfBytes += buffer.getSize();
}
}
finally {
if (success) {
notifyPipelinedConsumers();
}
else {
buffer.recycle();
}
}
}
/**
* Finishes the result partition.
*
* <p> After this operation, it is not possible to add further data to the result partition.
*
* <p> For BLOCKING results, this will trigger the deployment of consuming tasks.
*/
public void finish() throws IOException {
boolean success = false;
try {
checkInProduceState();
for (ResultSubpartition subpartition : subpartitions) {
synchronized (subpartition) {
subpartition.finish();
}
}
success = true;
}
finally {
if (success) {
isFinished = true;
notifyPipelinedConsumers();
}
}
}
public void release() {
release(null);
}
/**
* Releases the result partition.
*/
public void release(Throwable cause) {
if (isReleased.compareAndSet(false, true)) {
LOG.debug("{}: Releasing {}.", owningTaskName, this);
// Set the error cause
if (cause != null) {
this.cause = cause;
}
// Release all subpartitions
for (ResultSubpartition subpartition : subpartitions) {
try {
synchronized (subpartition) {
subpartition.release();
}
}
// Catch this in order to ensure that release is called on all subpartitions
catch (Throwable t) {
LOG.error("Error during release of result subpartition: " + t.getMessage(), t);
}
}
}
}
public void destroyBufferPool() {
if (bufferPool != null) {
bufferPool.lazyDestroy();
}
}
/**
* Returns the requested subpartition.
*/
public ResultSubpartitionView createSubpartitionView(int index, BufferProvider bufferProvider) throws IOException {
int refCnt = pendingReferences.get();
checkState(refCnt != -1, "Partition released.");
checkState(refCnt > 0, "Partition not pinned.");
checkElementIndex(index, subpartitions.length, "Subpartition not found.");
return subpartitions[index].createReadView(bufferProvider);
}
public Throwable getFailureCause() {
return cause;
}
/**
* Releases buffers held by this result partition.
*
* <p> This is a callback from the buffer pool, which is registered for result partitions, which
* are back pressure-free.
*/
@Override
public void releaseMemory(int toRelease) throws IOException {
checkArgument(toRelease > 0);
for (ResultSubpartition subpartition : subpartitions) {
toRelease -= subpartition.releaseMemory();
// Only release as much memory as needed
if (toRelease <= 0) {
break;
}
}
}
@Override
public String toString() {
return "ResultPartition " + partitionId.toString() + " [" + partitionType + ", "
+ subpartitions.length + " subpartitions, "
+ pendingReferences + " pending references]";
}
// ------------------------------------------------------------------------
/**
* Pins the result partition.
*
* <p> The partition can only be released after each subpartition has been consumed once per pin
* operation.
*/
void pin() {
while (true) {
int refCnt = pendingReferences.get();
if (refCnt >= 0) {
if (pendingReferences.compareAndSet(refCnt, refCnt + subpartitions.length)) {
break;
}
}
else {
throw new IllegalStateException("Released.");
}
}
}
/**
* Notification when a subpartition is released.
*/
void onConsumedSubpartition(int subpartitionIndex) {
if (isReleased.get()) {
return;
}
int refCnt = pendingReferences.decrementAndGet();
if (refCnt == 0) {
partitionManager.onConsumedPartition(this);
}
else if (refCnt < 0) {
throw new IllegalStateException("All references released.");
}
LOG.debug("{}: Received release notification for subpartition {} (reference count now at: {}).",
this, subpartitionIndex, pendingReferences);
}
// ------------------------------------------------------------------------
private void checkInProduceState() {
checkState(!isFinished, "Partition already finished.");
}
/**
* Notifies pipelined consumers of this result partition once.
*/
private void notifyPipelinedConsumers() throws IOException {
if (partitionType.isPipelined() && !hasNotifiedPipelinedConsumers) {
partitionConsumableNotifier.notifyPartitionConsumable(jobId, partitionId);
hasNotifiedPipelinedConsumers = true;
}
}
}