這一節以實例分析的方式繼續探索二進制創建的內部機制。
爲了驗證上一節的內容,首先在erl_bits.c中的erts_bs_appen函數里加入一些調試輸出。
void print_bin(char *title, unsigned char *data, int len)
{
Uint index = 0;
erts_fprintf(stderr,"%s {", title);
while(index < len){
if(index) erts_fprintf(stderr,",%u", data[index]);
else erts_fprintf(stderr,"%u", data[index]);
index++;
}
erts_fprintf(stderr, "} (len:%d)\n", len);
}
Eterm
erts_bs_append(Process* c_p, Eterm* reg, Uint live, Eterm build_size_term,
Uint extra_words, Uint unit)
{
Eterm bin; /* Given binary */
Eterm* ptr;
Eterm hdr;
ErlSubBin* sb;
ProcBin* pb;
Binary* binp;
Uint heap_need;
Uint build_size_in_bits;
Uint used_size_in_bits;
Uint unsigned_bits;
ERL_BITS_DEFINE_STATEP(c_p);
// 需要創建的二進制的位數: build_size_in_bits
if (is_small(build_size_term)) {
Sint signed_bits = signed_val(build_size_term);
if (signed_bits < 0) {
goto badarg;
}
build_size_in_bits = (Uint) signed_bits;
} else if (term_to_Uint(build_size_term, &unsigned_bits)) {
build_size_in_bits = unsigned_bits;
} else {
c_p->freason = unsigned_bits;
return THE_NON_VALUE;
}
// 測試輸出
erts_fprintf(stderr,"*** append start *** "
"build_size_in_bits:%d, heap_top:%p\n",
build_size_in_bits, c_p->htop);
bin = reg[live];
if (!is_boxed(bin)) {
badarg:
c_p->freason = BADARG;
return THE_NON_VALUE;
}
ptr = boxed_val(bin);
// 取出二進制數據流中的header
hdr = *ptr;
if (!is_binary_header(hdr)) {
goto badarg;
}
// #MARK_A
if (hdr != HEADER_SUB_BIN) {
// 非子二進制,不可寫
erts_fprintf(stderr, "not_sub_bin, not_writable, header:%X\n", hdr);
// if((hdr & _HEADER_SUBTAG_MASK) == HEAP_BINARY_SUBTAG){
// erts_fprintf(stderr, "be_heap_bin, not_writable\n", hdr);
// }else{
// erts_fprintf(stderr, "not_sub_bin, not_writable, header:%X\n", hdr);
// }
goto not_writable;
}
sb = (ErlSubBin *) ptr;
if (!sb->is_writable) {
// is_writable==0,不可寫
erts_fprintf(stderr, "not_writable is_writable==0\n");
goto not_writable;
}
pb = (ProcBin *) boxed_val(sb->orig);
print_bin("pb->bytes:", (char *)pb->bytes, pb->size);
// 必須是refc binary
ASSERT(pb->thing_word == HEADER_PROC_BIN);
if ((pb->flags & PB_IS_WRITABLE) == 0) {
// 標明瞭不可寫
erts_fprintf(stderr, "not_writable (pb->flags & PB_IS_WRITABLE) == 0\n");
goto not_writable;
}
/*
* OK, the binary is writable.
*/
// 測試輸出
erts_fprintf(stderr, "writable\n");
erts_bin_offset = 8*sb->size + sb->bitsize;
if (unit > 1) {
if ((unit == 8 && (erts_bin_offset & 7) != 0) ||
(erts_bin_offset % unit) != 0) {
goto badarg;
}
}
used_size_in_bits = erts_bin_offset + build_size_in_bits;
// 原來的sub binary設爲不可寫,因爲後繼空間將要被寫入數據
// #MARK_B
sb->is_writable = 0; /* Make sure that no one else can write. */
erts_fprintf(stderr, "pb->size: from %ld extend to %ld\n", pb->size, NBYTES(used_size_in_bits));
// 擴展到所需大小
pb->size = NBYTES(used_size_in_bits);
pb->flags |= PB_ACTIVE_WRITER;
/*
* Reallocate the binary if it is too small.
*/
binp = pb->val;
// 如果容器的空間不足,則重新分配容器大小到所需的二倍
if (binp->orig_size < pb->size) {
Uint new_size = 2*pb->size;
binp = erts_bin_realloc(binp, new_size);
binp->orig_size = new_size;
// 注意:重新分配空間以後,pb->val指針會被改變,
// 所以此用的binary不能被外部引用
// #MARK_C
pb->val = binp;
pb->bytes = (byte *) binp->orig_bytes;
}
erts_current_bin = pb->bytes;
// 測試輸出
erts_fprintf(stderr, "Binary Size:%ld, Binary Refc:%ld\n", binp->orig_size, binp->refc);
print_bin("new pb->bytes:", (char *)pb->bytes, pb->size);
/*
* Allocate heap space and build a new sub binary.
*/
reg[live] = sb->orig;
heap_need = ERL_SUB_BIN_SIZE + extra_words;
if (c_p->stop - c_p->htop < heap_need) {
(void) erts_garbage_collect(c_p, heap_need, reg, live+1);
}
// 創建一個新的sub binary,指向原二進制的開頭,
// 相比原來的sub binary,這裏只是把空間大小擴展到所需值
sb = (ErlSubBin *) c_p->htop; // 從堆頂寫入
// 進程堆頂上升ERL_SUB_BIN_SIZE(20)字節
c_p->htop += ERL_SUB_BIN_SIZE;
sb->thing_word = HEADER_SUB_BIN;
sb->size = BYTE_OFFSET(used_size_in_bits);
sb->bitsize = BIT_OFFSET(used_size_in_bits);
sb->offs = 0;
sb->bitoffs = 0;
// 最新的sub binary,設爲可寫
// 也就是說,在一系列的append操作中,只有最後一個sub binary是可寫的
sb->is_writable = 1;
sb->orig = reg[live];
erts_fprintf(stderr, "--- new_sub_binary_ok --- new_heap_top:%p\n\n", c_p->htop);
return make_binary(sb);
/*
* The binary is not writable. We must create a new writable binary and
* copy the old contents of the binary.
*/
not_writable:
{
Uint used_size_in_bytes; /* Size of old binary + data to be built */
Uint bin_size;
Binary* bptr;
byte* src_bytes;
Uint bitoffs;
Uint bitsize;
Eterm* hp;
/*
* Allocate heap space.
*/
heap_need = PROC_BIN_SIZE + ERL_SUB_BIN_SIZE + extra_words;
if (c_p->stop - c_p->htop < heap_need) {
(void) erts_garbage_collect(c_p, heap_need, reg, live+1);
bin = reg[live];
}
hp = c_p->htop;
/*
* Calculate sizes. The size of the new binary, is the sum of the
* build size and the size of the old binary. Allow some room
* for growing.
*/
ERTS_GET_BINARY_BYTES(bin, src_bytes, bitoffs, bitsize);
erts_bin_offset = 8*binary_size(bin) + bitsize;
if (unit > 1) {
if ((unit == 8 && (erts_bin_offset & 7) != 0) ||
(erts_bin_offset % unit) != 0) {
goto badarg;
}
}
used_size_in_bits = erts_bin_offset + build_size_in_bits;
used_size_in_bytes = NBYTES(used_size_in_bits);
bin_size = 2*used_size_in_bytes;
// 至少256字節
bin_size = (bin_size < 256) ? 256 : bin_size;
/*
* Allocate the binary data struct itself.
*/
// 創建大小爲所需空間的二倍的binary(最小值爲256字節),
// 它作爲一個容器,存儲在進程堆以外,
// 進程堆裏只存放引用這個binary的refc binary
bptr = erts_bin_nrml_alloc(bin_size);
bptr->flags = 0;
bptr->orig_size = bin_size;
erts_refc_init(&bptr->refc, 1);
erts_current_bin = (byte *) bptr->orig_bytes;
erts_fprintf(stderr, "bptr:%p, bin_size:%lu\n", bptr, bin_size);
/*
* Now allocate the ProcBin on the heap.
*/
// 創建refc binary,引用上面的binary, 並存儲到進程堆
pb = (ProcBin *) hp;
hp += PROC_BIN_SIZE;
pb->thing_word = HEADER_PROC_BIN;
// 當前設置爲實際所需的大小,以後的append操作可擴展
pb->size = used_size_in_bytes;
pb->next = MSO(c_p).first;
MSO(c_p).first = (struct erl_off_heap_header*)pb;
pb->val = bptr;
pb->bytes = (byte*) bptr->orig_bytes;
pb->flags = PB_IS_WRITABLE | PB_ACTIVE_WRITER;
OH_OVERHEAD(&(MSO(c_p)), pb->size / sizeof(Eterm));
/*
* Now allocate the sub binary and set its size to include the
* data about to be built.
*/
// 創建sub binary,引用上面的refc binary,並設置爲所需大小
sb = (ErlSubBin *) hp;
hp += ERL_SUB_BIN_SIZE;
sb->thing_word = HEADER_SUB_BIN;
sb->size = BYTE_OFFSET(used_size_in_bits);
sb->bitsize = BIT_OFFSET(used_size_in_bits);
sb->offs = 0;
sb->bitoffs = 0;
sb->is_writable = 1;
sb->orig = make_binary(pb);
c_p->htop = hp;
/*
* Now copy the data into the binary.
*/
copy_binary_to_buffer(erts_current_bin, 0, src_bytes, bitoffs, erts_bin_offset);
// 爲了方便測試,僅輸出前20字節
print_bin("dst_bytes:", erts_current_bin, 20);
erts_fprintf(stderr,"-------- new_heap_top:%p --------\n\n", c_p->htop);
return make_binary(sb);
}
}
實例1:在Erlang Shell中演示erlang的binary append操作過程
修改完Erlang C源碼後,編譯並啓動Erlang Shell,運行測試代碼:Bin0 = <<1>>,
Bin1 = <<Bin0/binary,2>>,
Bin2 = <<Bin1/binary,3>>,
Bin3 = <<Bin1/binary,4>>,
{Bin2,Bin3}.
結果如下(#開頭的表示註釋):Eshell V5.10.2 (abort with ^G)
1> Bin0 = <<1>>,
1> Bin1 = <<Bin0/binary,2>>,
1> Bin2 = <<Bin1/binary,3>>,
1> Bin3 = <<Bin1/binary,4>>,
1> {Bin2,Bin3}.
*** append start *** build_size_in_bits:8, heap_top:0x01b1aa24
not_sub_bin, not_writable, header:64
bptr:0x01c41d18, bin_size:256
dst_bytes: {0,0,196,1,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255} (len:20)
-------- new_heap_top:0x01b1aa50 --------
*** append start *** build_size_in_bits:8, heap_top:0x01b09090
not_sub_bin, not_writable, header:64
bptr:0x01c41d18, bin_size:256
dst_bytes: {1,0,196,1,255,255,15,15,15,15,15,15,15,15,15,15,15,15,15,15} (len:20)
-------- new_heap_top:0x01b090bc --------
*** append start *** build_size_in_bits:8, heap_top:0x01b09164
pb->bytes: {1} (len:1)
writable
pb->size: from 1 extend to 2
Binary Size:256, Binary Refc:1
new pb->bytes: {1,0} (len:2)
--- new_sub_binary_ok --- new_heap_top:0x01b09178
#上面兩段是 Bin1 = <<Bin0/binary, 2>> 執行過程中調用append函數的輸出,
#首先看到not_sub_bin,header爲64表示它是一個heap binary,不可寫。
#接着創建了一個容器binary,大小爲256字節,並且複製了Bin0,
#dst_bytes第二字節以後都是可被寫的空間,這裏看到後面有內容是因爲分配空間後沒有清0
#申請了binary容器後,繼續調用append進行擴展ProcBin的長度,從1擴展到2,用於容納<<Bin0/binary,2>>
*** append start *** build_size_in_bits:16, heap_top:0x01b1326c
not_sub_bin, not_writable, header:64
bptr:0x01c41e38, bin_size:256
dst_bytes: {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,170,170,170,170} (len:20)
-------- new_heap_top:0x01b13298 --------
*** append start *** build_size_in_bits:8, heap_top:0x01b13340
pb->bytes: {1,2} (len:2)
writable
pb->size: from 2 extend to 3
Binary Size:256, Binary Refc:1
new pb->bytes: {1,2,255} (len:3)
--- new_sub_binary_ok --- new_heap_top:0x01b13354
#上面兩段是 Bin2 = <<Bin1/binary, 3>> 執行過程中調用append函數的輸出,
#仍然看到Bin1被複制後再進行擴展到3字節,
#按上一節內容來說,這是不應該的,這裏到底發生了什麼?
*** append start *** build_size_in_bits:16, heap_top:0x025bbb58
not_sub_bin, not_writable, header:64
bptr:0x01c41d18, bin_size:256
dst_bytes: {1,2,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15} (len:20)
-------- new_heap_top:0x025bbb84 --------
*** append start *** build_size_in_bits:8, heap_top:0x025bbc2c
pb->bytes: {1,2} (len:2)
writable
pb->size: from 2 extend to 3
Binary Size:256, Binary Refc:1
new pb->bytes: {1,2,15} (len:3)
--- new_sub_binary_ok --- new_heap_top:0x025bbc40
#上面兩段是 Bin3 = <<Bin1/binary, 4>> 執行過程中調用append函數的輸出,
#這裏的Bin1被複制是正常的,但是,引起復制的原因是not_sub_bin,仍然是heap binary,
#按上一節的內容,這裏應該是: not_writable is_writable==0
#爲什麼Bin1是heap binary?
{<<1,2,3>>,<<1,2,4>>}
實例2:將代碼寫入文件中編譯後測試erlang的binary append操作過程
test.erl-module(test).
-export([t/4]).
-export([t/0]).
t() ->
Bin0 = <<1>>,
Bin1 = <<Bin0/binary,2>>,
Bin2 = <<Bin1/binary,3>>,
Bin3 = <<Bin1/binary,4>>,
{Bin2,Bin3}.
t(A1, A2, A3, A4) ->
Bin0 = <<A1>>,
Bin1 = <<Bin0/binary,A2>>, %% append操作1:Bin0是heap binary,不可寫
Bin2 = <<Bin1/binary,A3>>, %% append操作2:Bin1可寫,並設置爲以後不可寫
Bin3 = <<Bin1/binary,A4>>, %% append操作3:Bin1不可再寫
{Bin2,Bin3}.
先看看test:t/0Eshell V5.10.2 (abort with ^G)
1> test:t().
{<<1,2,3>>,<<1,2,4>>}
發現沒有調試輸出,猜測是沒有調用append函數。
繼續看看test:t/4的結果。Eshell V5.10.2 (abort with ^G)
1> test:t(1,2,3,4).
*** append start *** build_size_in_bits:8, heap_top:0x01b09598
not_sub_bin, not_writable, header:A4
bptr:0x01c41220, bin_size:256
dst_bytes: {1,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170} (len:20)
-------- new_heap_top:0x01b095c4 --------
*** append start *** build_size_in_bits:8, heap_top:0x01b095c4
pb->bytes: {1,2} (len:2)
writable
pb->size: from 2 extend to 3
Binary Size:256, Binary Refc:1
new pb->bytes: {1,2,170} (len:3)
--- new_sub_binary_ok --- new_heap_top:0x01b095d8
*** append start *** build_size_in_bits:8, heap_top:0x01b095d8
not_writable is_writable==0
bptr:0x01c40040, bin_size:256
dst_bytes: {1,2,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170} (len:20)
-------- new_heap_top:0x01b09604 --------
{<<1,2,3>>,<<1,2,4>>}
上面三段輸出剛好對應代碼中的三個append操作,而且結果也和上一節所說的內容一致了。上面的三個測試結果,讓我們產生了很多不解,下面就將這些迷團一一解開。
先將test.erl通過下面的命令編譯成erlang 指令。
erlc +\'S\' test.erl
test.S
{function, t, 0, 2}.
{label,1}.
{line,[{location,"test.erl",4}]}.
{func_info,{atom,test},{atom,t},0}.
{label,2}.
{move,{literal,{<<1,2,3>>,<<1,2,4>>}},{x,0}}.
return.
{function, t, 4, 4}.
{label,3}.
{line,[{location,"test.erl",11}]}.
{func_info,{atom,test},{atom,t},4}.
{label,4}.
{line,[{location,"test.erl",12}]}.
{bs_init2,{f,0},1,0,4,{field_flags,[]},{x,4}}.
{bs_put_integer,{f,0},{integer,8},1,{field_flags,[unsigned,big]},{x,0}}.
{bs_append,{f,0},{integer,8},0,4,8,{x,4},{field_flags,[]},{x,0}}.
{bs_put_integer,{f,0},{integer,8},1,{field_flags,[unsigned,big]},{x,1}}.
{bs_append,{f,0},{integer,8},0,4,8,{x,0},{field_flags,[]},{x,1}}.
{bs_put_integer,{f,0},{integer,8},1,{field_flags,[unsigned,big]},{x,2}}.
{bs_append,{f,0},{integer,8},3,4,8,{x,0},{field_flags,[]},{x,2}}.
{bs_put_integer,{f,0},{integer,8},1,{field_flags,[unsigned,big]},{x,3}}.
{put_tuple,2,{x,0}}.
{put,{x,1}}.
{put,{x,2}}.
return.
從上面可以看到,test:t/0已經被編譯器處理成{<<1,2,3>>,<<1,2,4>>},故不會再調用append,而test:t/4是通過參數動態創建的,從中可以看到它調用的函數及執行流程。
最不解的是在Erlang Shell中執行的結果,爲什麼和預期的相差這麼遠?
Erlang的二進制append操作過程中,由於ProcBin(見#MARK_C處)所引用的binary在擴展空間時可能會被移動,此時就必段更新ProcBin的引用指針(ProcBin->val),如果這時有別的ProcBin引用了這個binary,就可能會出問題,這也違反了變量不可變的原則,
所以,當append操作的過程中,如果出中間執行了一些會影響ProcBin的指令,sub binary就會被設置爲以後不可再寫。
- 當作結果返回
- 當作消息被髮送(PortOrPid ! Bin1)
- 當作普通變量插入ETS表
- 當作普通變量進行二進制匹配操作
在Erlang Shell中,測試代碼可以寫成這樣:
Bin0 = <<1>>.
Bin1 = <<Bin0/binary,2>>.
Bin2 = <<Bin1/binary,3>>.
Bin3 = <<Bin1/binary,4>>.
{Bin2,Bin3}.
這就說明,每一行都像一個函數一樣,執行結果將會被返回保存。例如,在Erlang Shell中,Bin3 = <<Bin1/binary,4>> 這一句的執行過程如下:
// 節選自beam_emu.c
do_bs_init_bits_known:
// 此處省略N行。。。
erts_bin_offset = 0;
erts_writable_bin = 0;
hb = (ErlHeapBin *) HTOP;
HTOP += heap_bin_size(num_bytes);
hb->thing_word = header_heap_bin(num_bytes);
hb->size = num_bytes;
erts_current_bin = (byte *) hb->data;
new_binary = make_binary(hb);
接着進行append操作,最後讀取結果:// 節選自beam_emu.c
do_bs_get_binary_all_reuse_common:
orig = mb->orig;
sb = (ErlSubBin *) boxed_val(context_to_binary_context);
hole_size = 1 + header_arity(sb->thing_word) - ERL_SUB_BIN_SIZE;
sb->thing_word = HEADER_SUB_BIN;
sb->size = BYTE_OFFSET(size);
sb->bitsize = BIT_OFFSET(size);
sb->offs = BYTE_OFFSET(offs);
sb->bitoffs = BIT_OFFSET(offs);
// 設爲不可寫
sb->is_writable = 0;
sb->orig = orig;
if (hole_size) {
sb[1].thing_word = make_pos_bignum_header(hole_size-1);
}
// ...
由於Erlang Shell中每一行的執行結果都會被返回保存,所以打斷了append連續優化的操作,出現了不是我們預期的結果。小結
- 在Erlang編程中,我們要了解Binary二進制的創建場情是否會發揮append的優化特性,特別是在網絡編程中的收包解包,要充分利用這一特性以提高效率。
- 編譯器會對erl代碼進行優化,測試時要注意這種優化是否會影響測試結果。
- Erlang Shell中執行代碼時,要了解它的執程流程及與文件代碼的區別,以免出現莫名的情況。
- erl代碼可以通過erlc +\'E\' file.erl和erlc +\'S\' file.erl生成代碼的擴展文件和指令文件,觀察程序執行過程。