<span style="font-family:Times New Roman;font-size:18px;">// 實現任意字節對齊的內存分配和釋放
void *AllignedMalloc(size_t size, int aligned)
{
// aligned is a power of 2
assert((aligned&(aligned - 1)) == 0);
// 分配內存空間
void *data = malloc(sizeof(void *)+aligned + size);
// 地址對齊
void **temp = (void **)data + 1;
void **alignedData = (void **)(((size_t)temp + aligned - 1)&-aligned);
// 保存原始內存地址
alignedData[-1] = data;
return alignedData; // 被轉換爲一級指針
}</span>
<span style="font-family:Times New Roman;font-size:18px;">void AlignedFree(void *data)
{
if (data)
{
free(((void **)data)[-1]);
}
}</span></span>
這裏以32位系統,16字節對齊爲例,用示意圖表示更加清楚一點。
原始分配內存圖
下面考慮兩種情況:
這裏要注意轉爲二級指針後:
1. 指向的內存區域是指針變量,存放的是地址,即編譯器對內存的解析發生變化
2. 對void **進行自增++,移動的是一個指針變量的大小
在mmx,sse優化的時候經常要求內存按8,16字節對齊。但是默認的編譯器一般都是8字節對齊。所以需要在分配內存的時候,能按16或則其他字節對齊。
以下是從xvid工程中找到的任意字節對齊的內存分配函數。
<span style="font-family:Times New Roman;font-size:18px;">/*****************************************************************************
* align_malloc
*
* This function allocates 'size' bytes (usable by the user) on the heap and
* takes care of the requested 'alignment'.
* In order to align the allocated memory block, the xvid_malloc allocates
* 'size' bytes + 'alignment' bytes. So try to keep alignment very small
* when allocating small pieces of memory.
*
* NB : a block allocated by xvid_malloc _must_ be freed with xvid_free
* (the libc free will return an error)
*
* Returned value : - NULL on error
* - Pointer to the allocated aligned block
*
****************************************************************************/
void * align_malloc(unsigned int size, unsigned int alignment)
{
unsigned char * mem_ptr;
unsigned char * tmp;
if(!alignment) alignment=4; //至少按4對齊
/* Allocate the required size memory + alignment so we
* can realign the data if necessary */
if ((tmp = (unsigned char *) malloc(size + alignment)) != NULL) {
/* Align the tmp pointer */
mem_ptr =
(unsigned char *) ((unsigned int) (tmp + alignment - 1) &
(~(unsigned int) (alignment - 1)));
/* Special case where malloc have already satisfied the alignment
* We must add alignment to mem_ptr because we must store
* (mem_ptr - tmp) in *(mem_ptr-1)
* If we do not add alignment to mem_ptr then *(mem_ptr-1) points
* to a forbidden memory space */
if (mem_ptr == tmp)
mem_ptr += alignment;
/* (mem_ptr - tmp) is stored in *(mem_ptr-1) so we are able to retrieve
* the real malloc block allocated and free it in xvid_free */
*(mem_ptr - 1) = (unsigned char) (mem_ptr - tmp);
//PRT("Alloc mem addr: 0x%08x, size:% 8d, file:%s <line:%d>, ", tmp, size, file, line);
/* Return the aligned pointer */
return ((void *)mem_ptr);
}
return(NULL);
}
/*****************************************************************************
* align_free
*
* Free a previously 'xvid_malloc' allocated block. Does not free NULL
* references.
*
* Returned value : None.
*
****************************************************************************/
void align_free(void *mem_ptr)
{
unsigned char *ptr;
if (mem_ptr == NULL)
return;
/* Aligned pointer */
ptr = ( unsigned char *)mem_ptr;
/* *(ptr - 1) holds the offset to the real allocated block
* we sub that offset os we free the real pointer */
ptr -= *(ptr - 1);
/* Free the memory */
free(ptr);
}
</span>
這個以任意字節內存對齊在筆試中已經遇到兩次,然而卻還是不會。再找工作中這個題目很能體現一個人的基礎水平,注意學習。
#define ngx_align(d, a) (((d) + (a - 1)) & ~(a - 1))
#include <stdio.h>
int
i;
int
main() {
printf
(
"64\n"
);
for
(i=1;i<=256;i++) {
printf
(
"%3d %3d,"
,i,ngx_align(i, 64));
if
(7==i%8)
printf
(
"\n"
);
}
printf
(
"\n"
);
printf
(
"128\n"
);
for
(i=1;i<=256;i++) {
printf
(
"%3d %3d,"
,i,ngx_align(i, 128));
if
(7==i%8)
printf
(
"\n"
);
}
printf
(
"\n"
);
return
0;
}
//64
// 1 64, 2 64, 3 64, 4 64, 5 64, 6 64, 7 64,
// 8 64, 9 64, 10 64, 11 64, 12 64, 13 64, 14 64, 15 64,
// 16 64, 17 64, 18 64, 19 64, 20 64, 21 64, 22 64, 23 64,
// 24 64, 25 64, 26 64, 27 64, 28 64, 29 64, 30 64, 31 64,
// 32 64, 33 64, 34 64, 35 64, 36 64, 37 64, 38 64, 39 64,
// 40 64, 41 64, 42 64, 43 64, 44 64, 45 64, 46 64, 47 64,
// 48 64, 49 64, 50 64, 51 64, 52 64, 53 64, 54 64, 55 64,
// 56 64, 57 64, 58 64, 59 64, 60 64, 61 64, 62 64, 63 64,
// 64 64, 65 128, 66 128, 67 128, 68 128, 69 128, 70 128, 71 128,
// 72 128, 73 128, 74 128, 75 128, 76 128, 77 128, 78 128, 79 128,
// 80 128, 81 128, 82 128, 83 128, 84 128, 85 128, 86 128, 87 128,
// 88 128, 89 128, 90 128, 91 128, 92 128, 93 128, 94 128, 95 128,
// 96 128, 97 128, 98 128, 99 128,100 128,101 128,102 128,103 128,
//104 128,105 128,106 128,107 128,108 128,109 128,110 128,111 128,
//112 128,113 128,114 128,115 128,116 128,117 128,118 128,119 128,
//120 128,121 128,122 128,123 128,124 128,125 128,126 128,127 128,
//128 128,129 192,130 192,131 192,132 192,133 192,134 192,135 192,
//136 192,137 192,138 192,139 192,140 192,141 192,142 192,143 192,
//144 192,145 192,146 192,147 192,148 192,149 192,150 192,151 192,
//152 192,153 192,154 192,155 192,156 192,157 192,158 192,159 192,
//160 192,161 192,162 192,163 192,164 192,165 192,166 192,167 192,
//168 192,169 192,170 192,171 192,172 192,173 192,174 192,175 192,
//176 192,177 192,178 192,179 192,180 192,181 192,182 192,183 192,
//184 192,185 192,186 192,187 192,188 192,189 192,190 192,191 192,
//192 192,193 256,194 256,195 256,196 256,197 256,198 256,199 256,
//200 256,201 256,202 256,203 256,204 256,205 256,206 256,207 256,
//208 256,209 256,210 256,211 256,212 256,213 256,214 256,215 256,
//216 256,217 256,218 256,219 256,220 256,221 256,222 256,223 256,
//224 256,225 256,226 256,227 256,228 256,229 256,230 256,231 256,
//232 256,233 256,234 256,235 256,236 256,237 256,238 256,239 256,
//240 256,241 256,242 256,243 256,244 256,245 256,246 256,247 256,
//248 256,249 256,250 256,251 256,252 256,253 256,254 256,255 256,
//256 256,
//128
// 1 128, 2 128, 3 128, 4 128, 5 128, 6 128, 7 128,
// 8 128, 9 128, 10 128, 11 128, 12 128, 13 128, 14 128, 15 128,
// 16 128, 17 128, 18 128, 19 128, 20 128, 21 128, 22 128, 23 128,
// 24 128, 25 128, 26 128, 27 128, 28 128, 29 128, 30 128, 31 128,
// 32 128, 33 128, 34 128, 35 128, 36 128, 37 128, 38 128, 39 128,
// 40 128, 41 128, 42 128, 43 128, 44 128, 45 128, 46 128, 47 128,
// 48 128, 49 128, 50 128, 51 128, 52 128, 53 128, 54 128, 55 128,
// 56 128, 57 128, 58 128, 59 128, 60 128, 61 128, 62 128, 63 128,
// 64 128, 65 128, 66 128, 67 128, 68 128, 69 128, 70 128, 71 128,
// 72 128, 73 128, 74 128, 75 128, 76 128, 77 128, 78 128, 79 128,
// 80 128, 81 128, 82 128, 83 128, 84 128, 85 128, 86 128, 87 128,
// 88 128, 89 128, 90 128, 91 128, 92 128, 93 128, 94 128, 95 128,
// 96 128, 97 128, 98 128, 99 128,100 128,101 128,102 128,103 128,
//104 128,105 128,106 128,107 128,108 128,109 128,110 128,111 128,
//112 128,113 128,114 128,115 128,116 128,117 128,118 128,119 128,
//120 128,121 128,122 128,123 128,124 128,125 128,126 128,127 128,
//128 128,129 256,130 256,131 256,132 256,133 256,134 256,135 256,
//136 256,137 256,138 256,139 256,140 256,141 256,142 256,143 256,
//144 256,145 256,146 256,147 256,148 256,149 256,150 256,151 256,
//152 256,153 256,154 256,155 256,156 256,157 256,158 256,159 256,
//160 256,161 256,162 256,163 256,164 256,165 256,166 256,167 256,
//168 256,169 256,170 256,171 256,172 256,173 256,174 256,175 256,
//176 256,177 256,178 256,179 256,180 256,181 256,182 256,183 256,
//184 256,185 256,186 256,187 256,188 256,189 256,190 256,191 256,
//192 256,193 256,194 256,195 256,196 256,197 256,198 256,199 256,
//200 256,201 256,202 256,203 256,204 256,205 256,206 256,207 256,
//208 256,209 256,210 256,211 256,212 256,213 256,214 256,215 256,
//216 256,217 256,218 256,219 256,220 256,221 256,222 256,223 256,
//224 256,225 256,226 256,227 256,228 256,229 256,230 256,231 256,
//232 256,233 256,234 256,235 256,236 256,237 256,238 256,239 256,
//240 256,241 256,242 256,243 256,244 256,245 256,246 256,247 256,
//248 256,249 256,250 256,251 256,252 256,253 256,254 256,255 256,
//256 256,
//
類似於取模,只不過它取的是能被整除的部分而不是模。
#define ngx_align(d, a) (((d) + (a - 1)) & ~(a - 1))
1)先看~(a-1)的含義
假設a是8
對應二進制是0000 1000
(a-1)對應的就是
0000 0111
~(a-1)對應的是 1111 1000
任何一個數 與~(a-1)按位相與 就是把第三方置0 也就是把該數變成 8的倍數
對應題目中的就是 64、128的倍數 也就形成了對齊
2)對於d + (a-1)的操作目的就是
eg:70以64對齊 結果是128而不是64的區別了
左半部分 d+a-1 保證處理後的值,這個值除以a得到的商(如果d%a=0,商不變,否則商+1)
右半部分&~(a-1)相當於把餘數抹去
。這裏,對a有要求,a必須是2的k次冪,也就是a=1,2,4,8,16....
內存的對其問題,目的是爲了加快IO效率。