C++ STL chrono和clock_gettime的性能對比

#include <iostream>
#include <chrono>
#include <ctime>

int main()
{
 const int ts = 1000000;

	std::chrono::steady_clock::time_point beg;
	std::chrono::steady_clock::time_point end;

	int64_t ms = 0;
	beg = std::chrono::steady_clock::now();
	for (int i = 0; i < ts; i ++)
	{
		std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();

		ms += std::chrono::duration_cast<std::chrono::milliseconds>(now - beg).count();
	}
	end = std::chrono::steady_clock::now();
	std::cout << "run steady_clock " << ms << " times " << ts << " time cost (ms) = "
		<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << std::endl;

	ms = 0;
	beg = std::chrono::steady_clock::now();
	std::chrono::high_resolution_clock::time_point base = std::chrono::high_resolution_clock::now();
	for (int i = 0; i < ts; i ++)
	{
		std::chrono::high_resolution_clock::time_point now = std::chrono::high_resolution_clock::now();

		ms += std::chrono::duration_cast<std::chrono::milliseconds>(now - base).count();
	}
	end = std::chrono::steady_clock::now();
	std::cout << "run high_resolution_clock " << ms << " times " << ts << " time cost (ms) = "
		<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << std::endl;

	ms = 0;
	beg = std::chrono::steady_clock::now();
	std::chrono::system_clock::time_point base1 = std::chrono::system_clock::now();
	for (int i = 0; i < ts; i ++)
	{
		std::chrono::system_clock::time_point now = std::chrono::system_clock::now();

		ms += std::chrono::duration_cast<std::chrono::milliseconds>(now - base1).count();
	}
	end = std::chrono::steady_clock::now();
	std::cout << "run system_clock " << ms << " times " << ts << " time cost (ms) = "
		<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << std::endl;

	ms = 0;
	beg = std::chrono::steady_clock::now();
	for (int i = 0; i < ts; i ++)
	{
		struct timespec ts;
		clock_gettime(CLOCK_MONOTONIC, &ts);

		ms += ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
	}
	end = std::chrono::steady_clock::now();
	std::cout << "run clock_gettime MONOTONIC " << ms << " times " << ts << " time cost (ms) = "
		<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << std::endl;

	ms = 0;
	beg = std::chrono::steady_clock::now();
	for (int i = 0; i < ts; i ++)
	{
		struct timespec ts;
		clock_gettime(CLOCK_REALTIME, &ts);

		ms += ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
	}
	end = std::chrono::steady_clock::now();
	std::cout << "run clock_gettime REALTIME " << ms << " times " << ts << " time cost (ms) = "
		<< std::chrono::duration_cast<std::chrono::milliseconds>(end - beg).count() << std::endl;

	return 0;
}

測試結果:

$ g++ test.cpp
$ ./a.out 
run steady_clock 25637513 times 1000000 time cost (ms) = 52
run high_resolution_clock 27368735 times 1000000 time cost (ms) = 55
run system_clock 27297934 times 1000000 time cost (ms) = 55
run clock_gettime MONOTONIC 2068584029758 times 1000000 time cost (ms) = 40
run clock_gettime REALTIME 1650699050496181143 times 1000000 time cost (ms) = 40

$ g++ -O2 test.cpp
$ ./a.out 
run steady_clock 22595347 times 1000000 time cost (ms) = 46
run high_resolution_clock 22716547 times 1000000 time cost (ms) = 46
run system_clock 22660435 times 1000000 time cost (ms) = 46
run clock_gettime MONOTONIC 2107850749483 times 1000000 time cost (ms) = 40
run clock_gettime REALTIME 1650699089762857666 times 1000000 time cost (ms) = 40

執行1000000次獲取時間,在不優化的情況下,chrono耗時52ms左右,clock_gettime一直穩定在40ms,使用-O2優化,chrono降到45ms左右,依然比clock_gettime要慢(clock_gettime包含在glibc庫裏,所以優化不優化對它基本沒什麼影響)。chrono要慢一些,但有更好的通用性,所以還是值得用chrono替換clock_gettime的。

瞅了一眼glibc的chrono源碼,發現它在linux下也只是把clock_gettime包裝了一下

    steady_clock::time_point
    steady_clock::now() noexcept
    {
#ifdef _GLIBCXX_USE_CLOCK_MONOTONIC
      timespec tp;
      // -EINVAL, -EFAULT
#ifdef _GLIBCXX_USE_CLOCK_GETTIME_SYSCALL
      syscall(SYS_clock_gettime, CLOCK_MONOTONIC, &tp);
#else
      clock_gettime(CLOCK_MONOTONIC, &tp);
#endif
      return time_point(duration(chrono::seconds(tp.tv_sec)
				 + chrono::nanoseconds(tp.tv_nsec)));
#else
      return time_point(system_clock::now().time_since_epoch());
#endif
    }

而這些多出來的時候,都是花在time_point這些包裝上了。

不過奇怪的是,在另一臺性能較差的筆記本上,chrono的性能差距被放大了:

$ g++ test.cpp
$ ./a.out 
run steady_clock 70294691 times 1000000 time cost (ms) = 136
run high_resolution_clock 65364921 times 1000000 time cost (ms) = 128
run system_clock 65016942 times 1000000 time cost (ms) = 127
run clock_gettime MONOTONIC 211863588796 times 1000000 time cost (ms) = 64
run clock_gettime REALTIME 1650700979815349233 times 1000000 time cost (ms) = 62
$ g++ -O2 test.cpp
$ ./a.out 
run steady_clock 52353254 times 1000000 time cost (ms) = 93
run high_resolution_clock 35520387 times 1000000 time cost (ms) = 76
run system_clock 41331069 times 1000000 time cost (ms) = 82
run clock_gettime MONOTONIC 226324539461 times 1000000 time cost (ms) = 60
run clock_gettime REALTIME 1650700994275088945 times 1000000 time cost (ms) = 57

可以看到,在不優化的時候,chrono消耗的時間是clock_gettime的2倍還多,即使在O2優化後,差距也還是要大一些。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章