#include <iostream>
#include <windows.h>
#include <chrono>
/*
\主要使用指令 repne scasb, 如果使用repne scasd應該能更快
*/
__declspec(naked) DWORD WINAPI StringLength1(LPCSTR pszStr)
{
__asm
{
push ebp
mov ebp, esp
or ecx, 0xffffffff
mov edi, [ebp+8]
xor eax, eax
test edi, edi
jz _fexit
repne scasb
not ecx
mov eax, ecx
dec eax
_fexit:
leave
retn 4
}
}
/*
\常規實現
*/
DWORD StringLength2(LPCSTR pszStr)
{
if (NULL == pszStr)
return 0;
DWORD dwCnt = 0;
while (*pszStr++)
++dwCnt;
return dwCnt;
}
// MAIN
int main(int, char **)
{
// 十億個字符
char* test = new char[1000000000];
memset(test, 65, 1000000000-1);
using namespace std;
using namespace chrono;
double cz1, cz2;
// 計算 StringLength1 執行時間
auto start = system_clock::now();
DWORD len1 = StringLength1(test);
auto end = system_clock::now();
auto dura = duration_cast<microseconds>(end - start);
cz1 = double(dura.count()) * microseconds::period::num / microseconds::period::den;
// 計算 StringLength2 執行時間
start = system_clock::now();
DWORD len2 = StringLength2(test);
end = system_clock::now();
dura = duration_cast<microseconds>(end - start);
cz2 = double(dura.count()) * microseconds::period::num / microseconds::period::den;
std::cout << "len1: " << len1 << ", cz1: " << cz1 << std::endl;
std::cout << "len2: " << len2 << ", cz2: " << cz2 << std::endl;
delete test;
return 0;
}
結果就是 repne scasb 要快4倍左右.
但也沒啥用...