Kmalloc代碼閱讀記錄
Base Linux RC5.0
SLOB SLAB SLUB
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
#ifndef CONFIG_SLOB
unsigned int index;
#endif
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
#ifndef CONFIG_SLOB
index = kmalloc_index(size);
if (!index)
return ZERO_SIZE_PTR;
return kmem_cache_alloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
#endif
}
return __kmalloc(size, flags);
}
kmalloc分兩個路徑
靜態size
如果需要是靜態size 那麼進行編譯優化
size > MAX CACHE ----> kmalloc_large
size <=MAX CACHE ----> kmalloc_index
KMALLOC_MAX_CACHE_SIZE 在slab定義 沒有定義CONFIG_FORCE_MAX_ZONEORDER情況下, 11+12(13)-1 = 23或者25 (1的23次方)、 25定義,32M 23定義,8M 22定義,4M
KMALLOC_MAX_CACHE_SIZE 在slub定義 PAGE_SHIFT 12(13) + 1 SLUB直接分配適合order-1頁面的請求(頁大小* 2)。較大的請求被傳遞給頁面分配器 13定義:16k 12定義:8k
SLUB 流程
< MAX CACHE
kmalloc_index: 取對數的index
kmem_cache_alloc_trace -> slab_alloc -> slab_alloc_node
-> slab_alloc_node -> __slab_alloc or object = c->freelist
大體流程是 如果cpu slab裏面有free object就從cpu slab取一個即可 如果沒有則調用__slab_alloc
object = c->freelist;
page = c->page;
if (unlikely(!object || !node_match(page, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
stat(s, ALLOC_SLOWPATH);
} else {
void *next_object = get_freepointer_safe(s, object);
/*
* The cmpxchg will only match if there was no additional
* operation and if we are on the right processor.
*
* The cmpxchg does the following atomically (without lock
* semantics!)
* 1. Relocate first pointer to the current per cpu area.
* 2. Verify that tid and freelist have not been changed
* 3. If they were not changed replace tid and freelist
*
* Since this is without lock semantics the protection is only
* against code executing on this cpu *not* from access by
* other cpus.
*/
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
next_object, next_tid(tid)))) {
note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->object_size);
slab_post_alloc_hook(s, gfpflags, 1, &object);
return object;
kmalloc_large:
kmalloc_large -> kmalloc_order_trace
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
unsigned int order = get_order(size);
/*取size的最高位,對數的尺度, 查找所需的最小分配粒度
* 0 -> 2^0 * PAGE_SIZE及以下
* 1 -> 2^1 * PAGE_SIZE到2^0 * PAGE_SIZE + 1
* 2 -> 2^2 * PAGE_SIZE到2^1 * PAGE_SIZE + 1
* 3 -> 2^3 * PAGE_SIZE到2^2 * PAGE_SIZE + 1
* 4 -> 2^4 * PAGE_SIZE到2^3 * PAGE_SIZE + 1*
/
return kmalloc_order_trace(size, flags, order);
}
void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
void *ret = kmalloc_order(size, flags, order);
trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
return ret;
}
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
void *ret;
struct page *page;
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
/*爲了避免不必要的開銷,我們將大型分配請求直接傳遞給頁面分配器*/
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
ret = kasan_kmalloc_large(ret, size, flags);
return ret;
}
動態size
如果動態size, slab, slub, slob 裏面調用的函數都不一樣
SLAB __kmalloc實現
__kmalloc —> _do_kmalloc
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
unsigned long caller)
{
struct kmem_cache *cachep;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return NULL;
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
ret = slab_alloc(cachep, flags, caller);
ret = kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(caller, ret,
size, cachep->size, flags);
return ret;
}
在slab的頭文件對MAX CACHE有定義, 如果使用slab分配器的,那麼最大不能超過32M 超過KMALLOC_MAX_CACHE_SIZE大小後就return NULL
#ifdef CONFIG_SLAB
/*
* The largest kmalloc size supported by the SLAB allocators is
* 32 megabyte (2^25) or the maximum allocatable page order if that is
* less than 32 MB.
SLUB __kmalloc實現
slub 分配__kmalloc 實現, 超過KMALLOC_MAX_CACHE_SIZE, 直接調用kmalloc_large
如果沒超過,調用slab_alloc_node, 與上面的靜態size路徑類此
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, flags);
s = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
ret = slab_alloc(s, flags, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
ret = kasan_kmalloc(s, ret, size, flags);
return ret;
}