系列文章導航:《新的職業目標,以及C++性能優化》
基於對象的消息隊列的性能優化checklist
一般的,消息隊列的項要麼採用添加type字段表示項類型,消息處理邏輯根據不同的type進入不同的分支,要麼會以OO的思維使用多態的方法進行消息處理。
前者典型的如MFC的消息循環,後者就比如是reSIProcate。
対消息的處理邏輯不在本博客的討論範圍內。但就消息隊列而言,影響性能的因素主要有兩點,第一點是隊列的同步,第二點是消息對象的分配銷燬。
關於隊列同步的優化,在以併發編程爲主題的書籍或文章中有深入的講解。
關於消息對象的分配銷燬的優化,可以使用如下的方法進行優化:
(1)使用內存池塊存儲消息對象。
(2)定義類專屬的new/delete操作符。
(3)使用指定分配地址的new操作符。
筆者在自己的臺式機上進行對比測試,機器配置:win10-64位/16G*1內存/三星SSD硬盤/i5-6500處理器。
對比的結果非常明顯,在指定位置調用new分配內存,比從OS獲取內存的性能要高出2~3個數量級。以下是測試代碼:
#include <Windows.h>
#include <iostream>
using namespace std;
/*
測試不同new的耗時。
(1)從指定地址new
(2)每次new都從OS獲取地址
*/
void test_new_consume_on_heap()
{
/*
在我的臺式機上測試結果如下:
1000*100 new[1B]-on-heap consume 9709 ticks,avg 310.688 ns per new+delete
1000*100 new[1k]-on-heap consume 10802 ticks,avg 345.664 ns per new+delete
1000*100 new[4k]-on-heap consume 20785 ticks,avg 665.12 ns per new+delete
1000*100 new[32k]-on-heap consume 60765 ticks,avg 1944.48 ns per new+delete
1000*1 new[1M]-on-heap consume 1847939 ticks,avg 591340 ns per new+delete
1000*1 new[4M]-on-heap consume 8043361 ticks,avg 2573875.520000 ns per new+delete
看得出,當數組較小時,new+delete的耗時很小,數組越大越耗時。
*/
LARGE_INTEGER start,stop,freqency;
QueryPerformanceFrequency(&freqency);
double ns_per_tick=1000*1000*1000/freqency.QuadPart,avg=0;
int counters = 1000*10;
char *pc=NULL;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new char[1];
//do-something
delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[1B]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new char[1024];
//do-something
delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[1k]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new char[4096];
//do-something
delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[4k]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new char[32768];
//do-something
delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[32k]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
counters = 1000*1;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new char[1048576];
//do-something
delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*1 new[1M]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new char[4194304];
//do-something
delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*1 new[4M]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<std::fixed <<avg<<" ns per new+delete"<<endl;
}
char c1B[1];
char c1k[1024];
char c4k[1024*4];
char c32k[1024*32];
char c1M[1024*1024];
char c4M[1024*1024*4];
void test_new_consume_on_stack()
{
/*
在我的臺式機上測試結果如下:
1000*100 new[1B]-on-heap consume 466 ticks,avg 14.912 ns per new+delete
1000*100 new[1k]-on-heap consume 382 ticks,avg 12.224 ns per new+delete
1000*100 new[4k]-on-heap consume 692 ticks,avg 22.144 ns per new+delete
1000*100 new[32k]-on-heap consume 388 ticks,avg 12.416 ns per new+delete
1000*100 new[1M]-on-heap consume 4178 ticks,avg 13.3696 ns per new+delete
1000*100 new[4M]-on-heap consume 6245 ticks,avg 19.984000 ns per new+delete
看得出,數組大小對分配耗時的操作並不大。
但是需要注意的是,在指定位置調用new的內存不能使用delete銷燬!!!
*/
LARGE_INTEGER start,stop,freqency;
QueryPerformanceFrequency(&freqency);
double ns_per_tick=1000*1000*1000/freqency.QuadPart,avg=0;
int counters = 1000*10;
char *pc=NULL;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new(c1B) char[1];
//do-something
//delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[1B]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new(c1k) char[1024];
//do-something
//delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[1k]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new(c4k) char[4096];
//do-something
//delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[4k]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new(c32k) char[32768];
//do-something
//delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[32k]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
counters = 1000*100;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new(c1M) char[1048576];
//do-something
//delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[1M]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<avg<<" ns per new+delete"<<endl;
QueryPerformanceCounter(&start);
for(int i=0;i<counters;++i)
{
pc = new(c4M) char[4194304];
//do-something
//delete []pc;
}
QueryPerformanceCounter(&stop);
avg = (stop.QuadPart-start.QuadPart)*ns_per_tick/counters;
cout<<"1000*100 new[4M]-on-heap consume "<<(stop.QuadPart-start.QuadPart)<<" ticks,avg "<<std::fixed <<avg<<" ns per new+delete"<<endl;
}