Flink源碼閱讀----04
源碼分析
1.MemorySegment
HeapMemorySegment和HybridMemorySegment都是MemorySegment類的子類
HeapMemorySegment表示此類表示由Flink管理的堆內存。該段由字節數組支持,並具有針對基本類型的隨機放置和獲取方法,以及比較和交換方法。此類專門用於堆內存的字節訪問和字節複製調用,同時重用MemorySegment中的多字節類型訪問和跨段操作。注意,通常不應手動分配內存段,而應通過 MemorySegmentFactory進行分配。
HybridMemorySegment表示Flink管理的一塊內存。 內存可以是堆上的,堆外直接的或堆外不安全的,此類是透明處理的。 此類專門用於堆內存的字節訪問和字節複製調用,同時重用MemorySegment中的多字節類型訪問和跨段操作。此類包含HeapMemorySegment的功能,但是對於單個字節的操作效率較低。注意,通常不應手動分配內存段,而應通過 MemorySegmentFactory進行分配。
1.HeapMemorySegment
//heap memory managed
private byte[] memory;
HeapMemorySegment(byte[] memory) {
this(memory, null);
}
//memory 保存數據的字節數組
//owner 引用數據的所有者
HeapMemorySegment(byte[] memory, Object owner) {
super(Objects.requireNonNull(memory), owner);
this.memory = memory;
}
如果是對堆內存的額外引用,可以通過內置檢查機制自動使字節數組檢查失敗,而無需額外檢查。
----------------------------------------------對父類方法的重寫-----------------------------------------------------
// 內存段的操作
/**
*釋放此內存段。 在調用此操作後,無法對內存段進行進一步的操作,並且該操作將失敗。僅在此內存段對象成爲 *垃圾回收之後,纔會釋放實際的內存(堆或堆外)。這樣可以確保不再存放任何數據並觸發對釋放段的檢查
*/
@Override
public void free() {
super.free();
this.memory = null;
}
// offset 內存段裏的便宜連,length 要包裝爲緩衝區的字節數
// 根據指定的 offset 和length作爲一個段的長度,返回一個bytebuffer
@Override
public ByteBuffer wrap(int offset, int length) {
try {
return ByteBuffer.wrap(this.memory, offset, length);
}
catch (NullPointerException e) {
throw new IllegalStateException("segment has been freed");
}
}
// 獲取剛剛指定的字節數組
public byte[] getArray() {
return this.memory;
}
//get set 方法
@Override
public final byte get(int index) {
return this.memory[index];
}
@Override
public final void put(int index, byte b) {
this.memory[index] = b;
}
@Override
public final void get(int index, byte[] dst) {
get(index, dst, 0, dst.length);
}
@Override
public final void put(int index, byte[] src) {
put(index, src, 0, src.length);
}
@Override
public final void get(int index, byte[] dst, int offset, int length) {
// system arraycopy does the boundary checks anyways, no need to check extra
System.arraycopy(this.memory, index, dst, offset, length);
}
@Override
public final void put(int index, byte[] src, int offset, int length) {
// system arraycopy does the boundary checks anyways, no need to check extra
System.arraycopy(src, offset, this.memory, index, length);
}
@Override
public final boolean getBoolean(int index) {
return this.memory[index] != 0;
}
@Override
public final void putBoolean(int index, boolean value) {
this.memory[index] = (byte) (value ? 1 : 0);
}
//批量讀寫
@Override
public final void get(DataOutput out, int offset, int length) throws IOException {
out.write(this.memory, offset, length);
}
@Override
public final void put(DataInput in, int offset, int length) throws IOException {
in.readFully(this.memory, offset, length);
}
//批量獲取方法。從該內存段中複製numBytes個字節,從位置offset開始到目標ByteBuffer。字節將從緩衝區的當前位置開始放入目標緩衝區。如果此方法嘗試寫入的字節數多於剩餘的目標字節緩衝區,將導致 java.nio.BufferOverflowException異常
@Override
public final void get(int offset, ByteBuffer target, int numBytes) {
// ByteBuffer 執行邊界檢查
target.put(this.memory, offset, numBytes);
}
@Override
public final void put(int offset, ByteBuffer source, int numBytes) {
//用於從中複製字節的ByteBuffer
source.get(this.memory, offset, numBytes);
}
---------------------------產生堆內存段的內存段工廠。該工廠不支持分配堆外內存--------------------------------------------------
public static final class HeapMemorySegmentFactory {
/**
* 先創建一個針對給定堆內存區域的新內存段
*/
public HeapMemorySegment wrap(byte[] memory) {
return new HeapMemorySegment(memory);
}
/**
* 分配一些未緩衝的內存,並創建一個表示該內存的新內存段。
*/
public HeapMemorySegment allocateUnpooledSegment(int size, Object owner) {
return new HeapMemorySegment(new byte[size], owner);
}
/**
* 創建一個包裝給定字節數組的內存段
*/
public HeapMemorySegment wrapPooledHeapMemory(byte[] memory, Object owner) {
return new HeapMemorySegment(memory, owner);
}
/**
* 防止在外部進行實例化
*/
HeapMemorySegmentFactory() {}
}
2.HybridMemorySegment
//MemorySegment 操作
@Override
public ByteBuffer wrap(int offset, int length) {
if (address <= addressLimit) {
if (heapMemory != null) {
return ByteBuffer.wrap(heapMemory, offset, length);
}
else {
try {
ByteBuffer wrapper = offHeapBuffer.duplicate();
wrapper.limit(offset + length);
wrapper.position(offset);
return wrapper;
}
catch (IllegalArgumentException e) {
throw new IndexOutOfBoundsException();
}
}
}
else {
throw new IllegalStateException("segment has been freed");
}
}
//offHeapBuffer.duplicate方法創建一個共享該緩衝區內容的新字節緩衝區。新緩衝區的內容將是此緩衝區的內容。對該緩衝區內容的更改在新緩衝區中可見,反之亦然。兩個緩衝區的位置,限制和標記值將是獨立的。新緩衝區的容量,限制,位置和標記值將與此緩衝區的值相同。當且僅當該緩衝區是直接緩衝區時,新緩衝區纔是直接緩衝區;當且僅當該緩衝區是隻讀緩衝區時,它纔是只讀緩衝區。
//get() and put() methods
@Override
public final byte get(int index) {
final long pos = address + index;
if (index >= 0 && pos < addressLimit) {
return UNSAFE.getByte(heapMemory, pos);
}
else if (address > addressLimit) {
throw new IllegalStateException("segment has been freed");
}
else {
// index is in fact invalid
throw new IndexOutOfBoundsException();
}
}
// 調用Unsafe方法來獲取本機內存
/**
* The unsafe handle for transparent memory copied (heap / off-heap).
*/
@SuppressWarnings("restriction")
protected static final sun.misc.Unsafe UNSAFE = MemoryUtils.UNSAFE;
=================>
@SuppressWarnings({"restriction", "UseOfSunClasses"})
public static final sun.misc.Unsafe UNSAFE = getUnsafe();
//如果索引爲負數,或者大於或等於內存段的大小,則拋出該異常
else if (address > addressLimit) {
throw new IllegalStateException("segment has been freed");
}
@Override
public final void get(int index, byte[] dst, int offset, int length) {
// check the byte array offset and length and the status
if ((offset | length | (offset + length) | (dst.length - (offset + length))) < 0) {
throw new IndexOutOfBoundsException();
}
final long pos = address + index;
if (index >= 0 && pos <= addressLimit - length) {
final long arrayAddress = BYTE_ARRAY_BASE_OFFSET + offset;
UNSAFE.copyMemory(heapMemory, pos, dst, arrayAddress, length);
}
else if (address > addressLimit) {
throw new IllegalStateException("segment has been freed");
}
else {
// index is in fact invalid
throw new IndexOutOfBoundsException();
}
}
//如果索引爲負數,或者太大而導致請求的字節數超過索引和內存段末尾之間的內存量,則拋出該異常。
if ((offset | length | (offset + length) | (dst.length - (offset + length))) < 0) {
throw new IndexOutOfBoundsException();
}
//addressLimit 最後一個可尋址字節之後一個字節的地址,即address + size
//dst 將內存複製到的那個內存
-------------------------------------------------------------------批量讀寫---------------------------------------------------------------------------
@Override
public final void get(DataOutput out, int offset, int length) throws IOException {
if (address <= addressLimit) {
if (heapMemory != null) {
out.write(heapMemory, offset, length);
}
else {
while (length >= 8) {
out.writeLong(getLongBigEndian(offset));
offset += 8;
length -= 8;
}
while (length > 0) {
out.writeByte(get(offset));
offset++;
length--;
}
}
}
else {
throw new IllegalStateException("segment has been freed");
}
}
out.writeLong(getLongBigEndian(offset));
//out.writeLong 將由8個字節組成的long值寫入字節流,寫入的字節流按照下面的格式寫入
* (byte)(0xff & (v >> 56))
* (byte)(0xff & (v >> 48))
* (byte)(0xff & (v >> 40))
* (byte)(0xff & (v >> 32))
* (byte)(0xff & (v >> 24))
* (byte)(0xff & (v >> 16))
* (byte)(0xff & (v >> 8))
* (byte)(0xff & v)
//可以通過接口DataInput的readLong方法讀取此方法寫入的字節,然後該方法將返回等於v的long值。
out.writeByte(get(offset));
//out.writeByte 將參數v的八個低位寫入輸出流。v的24個高位被忽略。(這意味着對於整數參數,writeByte 的作用與 write 完全相同。)用此方法寫入的字節可由接口DataInput的readByte方法讀取,然後將返回一個byte等於v。
//以big endian字節順序將給定的long值(64bit,8個字節)寫入給定位置。此方法的速度取決於系統的本機字節順序,並且可能比putLong(int,long)慢。
//在大多數情況下(例如內存中的臨時存儲或I/O和網絡的序列化),知道寫入值的字節順序與讀取值的字節順序相同,putLong(int,long)方法是首選.
@Override
public final void put(DataInput in, int offset, int length) throws IOException {
if (address <= addressLimit) {
if (heapMemory != null) {
in.readFully(heapMemory, offset, length);
}
else {
while (length >= 8) {
putLongBigEndian(offset, in.readLong());
offset += 8;
length -= 8;
}
while (length > 0) {
put(offset, in.readByte());
offset++;
length--;
}
}
}
else {
throw new IllegalStateException("segment has been freed");
}
}
字節順序由來
在寫字符流時,因爲字符型只佔一個字節數,計算機只須按一個字符一個字符寫入文件即可。
但是如果是處理整型時,由於整型佔4個字節,所以一個整型內部的字節存儲排列的順序直接關係到被計算機識別出來的整型值.
某種意義上也可直接理解計算機的識別順序就是所謂的字節順序。
感覺BigEndian就是地址越低的字節,數據中的偏移越高
Big-Endian 最重要的字節在整個內容的左端。
Little-Endian 最重要的字節在整個內容的右端。
低地址 高地址
高位字節 低位字節
----------------------------------------->
±±±±±±±±±±±±±±±±±±+
| 12 | 34 | 56 | 78 |
±±±±±±±±±±±±±±±±±±+
//BigEndian,在Unix運行
#include <unistd.h>
void main()
{
int i=0x41424344;
printf("int Address:%x Value:%x/n",&i,i);
printf("-------------------------------/n");
char* pAddress=(char*)&i;
int j;
for(j=0;j<=3;j++)
{
printf("char Address:%x Value:%c/n",pAddress,*pAddress);
pAddress++;
}
}
編譯輸出:
int Address:7f7f08f0 Value:41424344
-------------------------------
char Address:7f7f08f0 Value:A
char Address:7f7f08f1 Value:B
char Address:7f7f08f2 Value:C
char Address:7f7f08f3 Value:D
//LittleEndian 在win環境運行
#include <stdio.h>
void main()
{
int i=0x41424344;
printf("int Address:%x Value:%x/n",&i,i);
printf("-------------------------------/n");
char* pAddress=(char*)&i;
int j;
for(j=0;j<=3;j++)
{
printf("char Address:%x Value:%c/n",pAddress,*pAddress);
pAddress++;
}
}
編譯輸出:
int Address:12ff7c Value:41424344
-------------------------------
char Address:12ff7c Value:D
char Address:12ff7d Value:C
char Address:12ff7e Value:B
char Address:12ff7f Value:A
2.DataOutPutView
[外鏈圖片轉存失敗,源站可能有防盜鏈機制,建議將圖片保存下來直接上傳(img-OeWcnwvQ-1585101544303)(https://s1.ax1x.com/2020/03/24/8qy8hj.png)]
1.DataOutputViewStreamWrapper
將OutputStream轉換爲DataOutputView的實用程序類
//跳過numBytes字節的內存。如果某些程序讀取了跳過的內存,則結果不確定
//如果發生任何與I/O相關的問題,從而導致該視圖無法前進到所需位置,則拋出該異常
@Override
public void skipBytesToWrite(int numBytes) throws IOException {
if (tempBuffer == null) {
tempBuffer = new byte[4096];
}
while (numBytes > 0) {
int toWrite = Math.min(numBytes, tempBuffer.length);
write(tempBuffer, 0, toWrite);
numBytes -= toWrite;
}
}
2.SeekableDataOutputView
標記 DataOutputView爲可檢索的接口。可以設置可搜索的視圖寫入的位置。
3.DataOutputSerializer
DataOutput接口的簡單高效的序列化程序。
//獲取對內部字節緩衝區的引用。該緩衝區可能大於實際的序列化數據。僅從0到length()的字節有效。下一次調用會覆蓋緩衝區。
public byte[] getSharedBuffer() {
return buffer;
}
//獲取緩衝區的副本,該副本具有到目前爲止已序列化的數據的正確長度。返回的緩衝區是互斥副本,可以安全地使用而不會被以後對該串行器的寫調用覆蓋。
public byte[] getCopyOfBuffer() {
return Arrays.copyOf(buffer, position);
}
//將參數b的八個低階位寫入輸出流。b的24個高位被忽略。
@Override
public void write(int b) throws IOException {
if (this.position >= this.buffer.length) {
resize(1);
}
this.buffer[this.position++] = (byte) (b & 0xff);
}
//從數組 b中依次寫入len 個字節到輸出流。如果b爲null,則拋出NullPointerException。如果off 爲負或 len 爲負,或者off + len大於數組的長度 b,會引發 IndexOutOfBoundsException 。如果len爲零,則不會寫入任何字節。否則,先寫字節b [off],然後寫 b [off + 1],依此類推; 最後寫入的字節是 b [off + len-1]
@Override
public void write(byte[] b, int off, int len) throws IOException {
if (len < 0 || off > b.length - len) {
throw new ArrayIndexOutOfBoundsException();
}
if (this.position > this.buffer.length - len) {
resize(len);
}
System.arraycopy(b, off, this.buffer, this.position, len);
this.position += len;
}
3.數據流轉過程
[外鏈圖片轉存失敗,源站可能有防盜鏈機制,建議將圖片保存下來直接上傳(img-yWdFcwn0-1585101544303)(https://s1.ax1x.com/2020/03/17/8UUCEn.png)]
1.數據在operator中處理
數據在operator中進行處理後,我們經常會調用out.collect(…),這類方法將數據發送到下游,而這個方法,會將數據封裝成StreamRecord,內部包含了時間戳等信息。
/** The actual value held by this record. */
private T value;
/** The timestamp of the record. */
private long timestamp;
/** Flag whether the timestamp is actually set. */
private boolean hasTimestamp;
2.數據提交給RecordWrite處理分發
RecordWriter負責將數據寫入RequsetPartition中去。提交給RecordWrite很簡單,就是以下代碼,在RecordWriterOutput類中。
@Override
public void collect(StreamRecord<OUT> record) {
if (this.outputTag != null) {
// we are not responsible for emitting to the main output.
return;
}
pushToRecordWriter(record);
}
@Override
public <X> void collect(OutputTag<X> outputTag, StreamRecord<X> record) {
if (this.outputTag == null || !this.outputTag.equals(outputTag)) {
// we are not responsible for emitting to the side-output specified by this
// OutputTag.
return;
}
pushToRecordWriter(record);
}
private <X> void pushToRecordWriter(StreamRecord<X> record) {
serializationDelegate.setInstance(record);
try {
recordWriter.emit(serializationDelegate);
}
catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
}
3.RecordWriter處理數據
在recordWriter初始化的時候,默認會開啓一個守護線程,定時的去flush一下通道里面的數據。
//RecordWriter
RecordWriter(ResultPartitionWriter writer, long timeout, String taskName) {
this.targetPartition = writer;
this.numberOfChannels = writer.getNumberOfSubpartitions();
this.serializer = new SpanningRecordSerializer<T>();
checkArgument(timeout >= -1);
this.flushAlways = (timeout == 0);
if (timeout == -1 || timeout == 0) {
outputFlusher = null;
} else {
String threadName = taskName == null ?
DEFAULT_OUTPUT_FLUSH_THREAD_NAME :
DEFAULT_OUTPUT_FLUSH_THREAD_NAME + " for " + taskName;
//開啓一個守護線程,定時去flushAll
outputFlusher = new OutputFlusher(threadName, timeout);
outputFlusher.start();
}
}
recordWriter接收數據,並序列化寫入channel的是在emit方法。
protected void emit(T record, int targetChannel) throws IOException, InterruptedException {
checkErroneous();
//將數據序列化成ByteBuffer(JAVA NIO的緩衝區)
serializer.serializeRecord(record);
// Make sure we don't hold onto the large intermediate serialization buffer for too long
if (copyFromSerializerToTargetChannel(targetChannel)) {
serializer.prune();
}
}
先看序列化的方法,將會數據寫入java.nio.ByteBuffer中去,下面的dataBuffer 就是java.nio.ByteBuffer的實例。
//SpanningRecordSerializer
public void serializeRecord(T record) throws IOException {
if (CHECKED) {
if (dataBuffer.hasRemaining()) {
throw new IllegalStateException("Pending serialization of previous record.");
}
}
serializationBuffer.clear();
// the initial capacity of the serialization buffer should be no less than 4
serializationBuffer.skipBytesToWrite(4);
/**
* 這裏就是各種序列化各顯神通的地方了,怎麼樣去序列化都在這裏體現
* 最終都會寫入serializationBuffer中
*/
// write data and length
record.write(serializationBuffer);
int len = serializationBuffer.length() - 4;
serializationBuffer.setPosition(0);
serializationBuffer.writeInt(len);
serializationBuffer.skipBytesToWrite(len);
dataBuffer = serializationBuffer.wrapAsByteBuffer();
}
emit會調用copyFromSerializerToTargetChannel方法,這裏會將數據寫入對應的channel中去,channel中通過BufferBuilder去接收數據,而BufferBuilder中就封裝了Flink內部著名的MemorySegment。
protected boolean copyFromSerializerToTargetChannel(int targetChannel) throws IOException, InterruptedException {
// We should reset the initial position of the intermediate serialization buffer before
// copying, so the serialization results can be copied to multiple target buffers.
serializer.reset();
boolean pruneTriggered = false;
//獲取當前的memorysegment,如果當前沒有,那麼就去申請。BufferBuilder中封裝了memorysegment
BufferBuilder bufferBuilder = getBufferBuilder(targetChannel);
//往bufferBuilder寫入數據
SerializationResult result = serializer.copyToBufferBuilder(bufferBuilder);
/**
* 1.如果NIO的緩衝區沒有寫滿,那麼就繼續寫不會觸發break,繼續往memorySegment中寫---> result.isFullBuffer = true result.isFullRecord = false
* 2.如果NIO的緩衝區寫滿了,而memorySegment沒有寫滿,直接跳出循環,進行flush---> result.isFullBuffer = false result.isFullRecord = true
* 3.如果NIO的緩存區和memorySegment都寫滿了,那麼會清空targetChannel中的數據,然後跳出循環---> result.isFullBuffer = true result.isFullRecord = true
*/
while (result.isFullBuffer()) {
//用於輸出監控內容
finishBufferBuilder(bufferBuilder);
// If this was a full record, we are done. Not breaking out of the loop at this point
// will lead to another buffer request before breaking out (that would not be a
// problem per se, but it can lead to stalls in the pipeline).
if (result.isFullRecord()) {
pruneTriggered = true;
emptyCurrentBufferBuilder(targetChannel);
break;
}
//從localbufferpool中去拉取memorySegment,localbufferpool如果有可用的memorySegment,就直接返回,沒有就重新去申請
bufferBuilder = requestNewBufferBuilder(targetChannel);
//往bufferBuilder寫入數據
result = serializer.copyToBufferBuilder(bufferBuilder);
}
checkState(!serializer.hasSerializedData(), "All data should be written at once");
if (flushAlways) {
flushTargetPartition(targetChannel);
}
return pruneTriggered;
}
申請內存
//requestNewBufferBuilder
// @Override
public BufferBuilder requestNewBufferBuilder(int targetChannel) throws IOException, InterruptedException {
checkState(bufferBuilders[targetChannel] == null || bufferBuilders[targetChannel].isFinished());
BufferBuilder bufferBuilder = targetPartition.getBufferBuilder();
targetPartition.addBufferConsumer(bufferBuilder.createBufferConsumer(), targetChannel);
bufferBuilders[targetChannel] = bufferBuilder;
return bufferBuilder;
}
@Override
public BufferBuilder getBufferBuilder() throws IOException, InterruptedException {
checkInProduceState();
return bufferPool.requestBufferBuilderBlocking();
}
@Override
public BufferBuilder requestBufferBuilderBlocking() throws IOException, InterruptedException {
return toBufferBuilder(requestMemorySegmentBlocking());
}
private BufferBuilder toBufferBuilder(MemorySegment memorySegment) {
if (memorySegment == null) {
return null;
}
return new BufferBuilder(memorySegment, this);
}
//本地BufferPool
private MemorySegment requestMemorySegmentBlocking() throws InterruptedException, IOException {
MemorySegment segment;
while ((segment = requestMemorySegment()) == null) {
try {
// wait until available
isAvailable().get();
} catch (ExecutionException e) {
LOG.error("The available future is completed exceptionally.", e);
ExceptionUtils.rethrow(e);
}
}
return segment;
}
數據從數據序列化緩衝區(ByteBuffer)寫入BufferBuilder中
@Override
public SerializationResult copyToBufferBuilder(BufferBuilder targetBuffer) {
targetBuffer.append(dataBuffer);
targetBuffer.commit();
//判斷是否有剩餘空間,可以繼續往下寫
return getSerializationResult(targetBuffer);
}
private SerializationResult getSerializationResult(BufferBuilder targetBuffer) {
if (dataBuffer.hasRemaining()) {
//如果nio的ByteBuffer(緩衝區)還有空間,那麼繼續往這個dataBuffer寫
return SerializationResult.PARTIAL_RECORD_MEMORY_SEGMENT_FULL;
}
//如果滿了,就會判斷是否是memory_segment滿了
return !targetBuffer.isFull()
? SerializationResult.FULL_RECORD //只是緩衝區滿了
: SerializationResult.FULL_RECORD_MEMORY_SEGMENT_FULL;
}
flush操作,會觸發 ResultPartition的flush方法,然後觸發ResultSubpartition的flush方法,其中ResultSubpartition方法有兩個實現類,BoundedBlockingSubpartition是對應的有界數據集,而PipelinedSubpartition對應的是有界和無界數據集。
//PipelinedSubpartition 最後會開啓一個線程去調用netty方法把數據寫出去
@Override
public void notifyDataAvailable() {
requestQueue.notifyReaderNonEmpty(this);
}
void notifyReaderNonEmpty(final NetworkSequenceViewReader reader) {
// The notification might come from the same thread. For the initial writes this
// might happen before the reader has set its reference to the view, because
// creating the queue and the initial notification happen in the same method call.
// This can be resolved by separating the creation of the view and allowing
// notifications.
// TODO This could potentially have a bad performance impact as in the
// worst case (network consumes faster than the producer) each buffer
// will trigger a separate event loop task being scheduled.
ctx.executor().execute(() -> ctx.pipeline().fireUserEventTriggered(reader));
}