當涉及到多字節的數據類型(如 uint16_t
、uint32_t
等)時,字節在內存中的存儲順序會影響到數據的解釋方式。這個存儲順序可以分爲兩種:大端對齊(Big Endian)和小端對齊(Little Endian)。
大端對齊(Big Endian):
在大端對齊中,數據的高字節(Most Significant Byte,MSB)存儲在內存的低地址,而數據的低字節(Least Significant Byte,LSB)存儲在內存的高地址。舉個例子,考慮一個 16 位無符號整數 0x1234
:
-
內存中的存儲順序是:
0x12
(高地址)0x34
(低地址)。 -
在二進制流中,高位字節排在前面,低位字節排在後面( 12 34)。
小端對齊(Little Endian):
在小端對齊中,數據的低字節(LSB)存儲在內存的低地址,而數據的高字節(MSB)存儲在內存的高地址。以同樣的例子 0x1234
爲例:
-
內存中的存儲順序是:
0x34
(低地址)0x12
(高地址)。 -
在二進制流中,低位字節排在前面,高位字節排在後面( 34 12)。
在現代計算機中,數據通常是按照小端存儲的。這意味着在多字節數據類型(如整數、浮點數等)的存儲中,最低有效字節(Least Significant Byte,LSB)存儲在最低地址,而最高有效字節(Most Significant Byte,MSB)存儲在最高地址。
對於 std::memcpy
函數,它只是簡單地從源地址開始,按照字節順序連續地複製數據到目標地址。它不會考慮數據的大小端存儲方式。因此,無論源數據是小端存儲還是大端存儲,std::memcpy
都會按照字節的順序進行拷貝。這就意味着,當你使用 std::memcpy
從一個變量複製到另一個變量時,字節的存儲順序會被保留。
進一步解釋,如果計算機裏定義了變量 uint16_t testa=0x1234。它在計算機裏面是小端對齊(一般的計算機都是小端對齊)存放的,存放格式是 34 12。
因此,在將小端對齊的二進制流,使用memcpy拷貝的時候,比如拷貝2個字節,賦值給 uint16_t testb 的時候,它是不需要進行 高低位轉換的。計算機裏面存的順序,就是 小端對齊 存的順序。
大端對齊,就不同了。如果計算機裏定義了變量 uint16_t testa=0x1234。它在計算機裏面是小端對齊(一般的計算機都是小端對齊)存放的,存放格式是 34 12
此時我們有一個 大端對齊的二進制流,它裏面如果存放了testa,它在這個二進制流裏面存放格式是 12 34 ,但是它在計算出中存放格式應該是 34 12(小端對齊格式)。
因此,大端對齊的時候,要從後往前取數據(先把低地址位置的數據取了, 34 是低地址位的數據)
最後,字符串沒有大小端對齊的概念。閉眼 memcpy即可。
這下面的代碼是 將數字或者字符串轉爲 小端對齊,大端對齊;再從 二進制流反向解析
#include <iostream> #include <vector> #include <cstring> #include <iomanip> #include <cstdint> #include <algorithm> bool isLittleEndian() { uint32_t num = 0x01020304; uint8_t* ptr = reinterpret_cast<uint8_t*>(&num); return (*ptr == 0x04); // If the least significant byte (LSB) is 0x04, then it's little-endian } //====================== 處理小端對齊 void appendLittleEndian(std::vector<uint8_t>& block, const void* data, size_t size) { const uint8_t* bytes = static_cast<const uint8_t*>(data); for (size_t i = 0; i < size; ++i) { block.push_back(bytes[i]); } } void appendBigEndian(std::vector<uint8_t>& block, const void* data, size_t size) { const uint8_t* bytes = static_cast<const uint8_t*>(data); for (size_t i = size; i > 0; --i) { block.push_back(bytes[i - 1]); } } //======================= 處理大端對齊 template<typename T> void parseLittleEndian(const std::vector<uint8_t>& block, size_t& offset, T& value) { std::memcpy(&value, block.data() + offset, sizeof(T)); offset += sizeof(T); } template<typename T> void parseBigEndian(const std::vector<uint8_t>& block, size_t& offset, T& value) { value = 0; for (size_t i = 0; i < sizeof(T); ++i) { value <<= 8; value |= block[offset + i]; } offset += sizeof(T); } //=================== 處理字符串 void appendPaddedString(std::vector<uint8_t>& block, const char* str, size_t length, size_t paddedLength) { size_t len = std::strlen(str); if (len > length) { len = length; } block.insert(block.end(), str, str + len);//這樣的效率高嗎? for (size_t i = len; i < paddedLength; ++i) { block.push_back(0); // Padding with null bytes } } void parsePaddedString(const std::vector<uint8_t>& block, size_t& offset, char* str, size_t length) { std::memcpy(str, &block[offset], length); offset += length; // Offset increment str[length] = '\0'; // Ensure null-terminated string } //===============處理字符串2, 更加高效率一些 void appendPaddedString2(std::vector<uint8_t>& block, const char* str, size_t length, size_t paddedLength) { size_t len = std::strlen(str); if (len > length) { len = length; } std::copy(str, str + length, std::back_inserter(block)); //! 2個函數的差異點 for (size_t i = len; i < paddedLength; ++i) { block.push_back(0); // Padding with null bytes } } int main() { if(isLittleEndian()){ std::cout<<" system is little endian"<<std::endl; } else{ std::cout<<" system is big endian"<<std::endl; } std::vector<uint8_t> littleEndianBlock; std::vector<uint8_t> bigEndianBlock; std::vector<uint8_t> strBlock1; std::vector<uint8_t> strBlock2; uint16_t num16 = 0x1234; // 4660 uint32_t num32 = 0x56000078; //1442840696 // Append uint16_t (2 bytes) in little-endian format appendLittleEndian(littleEndianBlock, &num16, sizeof(num16)); // Append uint32_t (4 bytes) in little-endian format appendLittleEndian(littleEndianBlock, &num32, sizeof(num32)); appendBigEndian(bigEndianBlock, &num16, sizeof(num16)); appendBigEndian(bigEndianBlock, &num32, sizeof(num32)); // Output binary stream std::cout << "Binary Stream in little-endian format:\n"; for (uint8_t byte : littleEndianBlock) { std::cout << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(byte) << " "; } std::cout << std::endl; std::cout << "Binary Stream in big-endian format:\n"; for (uint8_t byte : bigEndianBlock) { std::cout << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(byte) << " "; } std::cout << std::endl; char szMsg[30]={0}; strncpy(szMsg, "Hello World", sizeof(szMsg)); auto paddedLength = (0 == sizeof(szMsg)%4)? (sizeof(szMsg)):((sizeof(szMsg)/4+1)*4); appendPaddedString(strBlock1, szMsg, strlen(szMsg), paddedLength); std::cout << "Binary Stream in str1 format. block1 size:"<<std::dec<<strBlock1.size()<<std::endl; for (uint8_t byte : strBlock1) { std::cout << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(byte) << " "; } std::cout << std::endl; appendPaddedString2(strBlock2, szMsg, strlen(szMsg), paddedLength); std::cout << "Binary Stream in str2 format. block2 size:"<<std::dec<<strBlock2.size()<<std::endl; for (uint8_t byte : strBlock2) { std::cout << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(byte) << " "; } std::cout << std::endl; //============================================ 解析二進制流 ============================================ size_t offset = 0; uint16_t little_a=0; uint32_t little_b=0; parseLittleEndian(littleEndianBlock,offset, little_a); parseLittleEndian(littleEndianBlock,offset, little_b); std::cout<<std::dec<<"offset:"<<offset<<",block.size:"<<littleEndianBlock.size()<<",little_a:"<<static_cast<int>(little_a)<<",little_b:"<<static_cast<int>(little_b)<<std::endl; //parseBigEndian size_t offset2 = 0; uint16_t big_a=0; uint32_t big_b=0; parseBigEndian(bigEndianBlock, offset2, big_a); parseBigEndian(bigEndianBlock, offset2, big_b); std::cout<<std::dec<<"offset2:"<<offset2<<",block.size:"<<bigEndianBlock.size()<<",big_a:"<<static_cast<int>(big_a)<<",big_b:"<<static_cast<int>(big_b)<<std::endl; //parseStr char szBuff1[100] = {0}; char szBuff2[100] = {0}; size_t offset3 = 0; size_t offset4 = 0; parsePaddedString(strBlock1,offset3,szBuff1,sizeof(szBuff1)); parsePaddedString(strBlock2,offset4,szBuff2,sizeof(szBuff2)); std::cout <<std::dec<<"strBlock1.size:"<<strBlock1.size()<<",offset3:"<<offset3<<",strlen(szBuff1):"<<strlen(szBuff1)<< ",szBuff1:"<<szBuff1<<std::endl; std::cout <<std::dec<<"strBlock2.size:"<<strBlock2.size()<<",offset4:"<<offset4<<",strlen(szBuff2):"<<strlen(szBuff2)<< ",szBuff2:"<<szBuff2<<std::endl; return 0; } /************************************************ uint16_t num16 = 0x1234; // 4660 uint32_t num32 = 0x56000078; //1442840696 屏幕輸入的內容: Binary Stream in little-endian format: 34 12 78 00 00 56 Binary Stream in big-endian format: 12 34 56 00 00 78 Binary Stream in str1 format. block1 size:32 48 65 6c 6c 6f 20 57 6f 72 6c 64 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Binary Stream in str2 format. block2 size:32 48 65 6c 6c 6f 20 57 6f 72 6c 64 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 offset:6,block.size:6,little_a:4660,little_b:1442840696 offset2:6,block.size:6,big_a:4660,big_b:1442840696 strBlock1.size:32,offset3:100,strlen(szBuff1):11,szBuff1:Hello World strBlock2.size:32,offset4:100,strlen(szBuff2):11,szBuff2:Hello World **************************************************/