slab對象的分配:
slab對象的分配使用kmem_cache_alloc():
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *ret = slab_alloc(cachep, flags, _RET_IP_);
trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
return ret;
}
實際執行函數爲slab_alloc()->__do_cache_alloc()->____cache_alloc()
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
bool force_refill = false;
check_irq_off();
ac = cpu_cache_get(cachep);---------------(1)
if (likely(ac->avail)) {------------------(2)
ac->touched = 1;
objp = ac_get_obj(cachep, ac, flags, false);------------(3)
/*
* Allow for the possibility all avail objects are not allowed
* by the current flags
*/
if (objp) {
STATS_INC_ALLOCHIT(cachep);
goto out;
}
force_refill = true;----------------(4)
}
STATS_INC_ALLOCMISS(cachep);
objp = cache_alloc_refill(cachep, flags, force_refill);--------------(5)
/*
* the 'ac' may be updated by cache_alloc_refill(),
* and kmemleak_erase() requires its correct value.
*/
ac = cpu_cache_get(cachep);
out:
/*
* To avoid a false negative, if an object that is in one of the
* per-CPU caches is leaked, we need to make sure kmemleak doesn't
* treat the array pointers as a reference to the object.
*/
if (objp)
kmemleak_erase(&ac->entry[ac->avail]);
return objp;
}
(1)獲取array_cache結構體,由於之前初始化slab描述符時已經初始化完畢,固肯定能獲取到。
(2)如果array_cache的avail值不爲0表示當前cpu本地緩衝池中有空閒對象,則調用ac_get_obj獲取slab對象。
(3)獲取slab對象:
static inline void *ac_get_obj(struct kmem_cache *cachep,
struct array_cache *ac, gfp_t flags, bool force_refill)
{
void *objp;
if (unlikely(sk_memalloc_socks()))
objp = __ac_get_obj(cachep, ac, flags, force_refill);
else
objp = ac->entry[--ac->avail];//直接獲取當前array_cache的entry數組最後一個成員
return objp;
}
(4)如果沒有分配成功,則進入cache_alloc_refill()重新分配對象緩衝池
(5)如果array_cache不存在空閒對象,則直接調用核心函數cache_alloc_refill()獲取slab對象。
static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
bool force_refill)
{
int batchcount;
struct kmem_cache_node *n;
struct array_cache *ac;
int node;
check_irq_off();
node = numa_mem_id();
if (unlikely(force_refill))
goto force_grow;
retry:
ac = cpu_cache_get(cachep);--------------(1)
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
/*
* If there was little recent activity on this cache, then
* perform only a partial refill. Otherwise we could generate
* refill bouncing.
*/
batchcount = BATCHREFILL_LIMIT;
}
n = get_node(cachep, node);--------------(2)
BUG_ON(ac->avail > 0 || !n);
spin_lock(&n->list_lock);
/* See if we can refill from the shared array */
if (n->shared && transfer_objects(ac, n->shared, batchcount)) {---------(3)
n->shared->touched = 1;
goto alloc_done;
}
while (batchcount > 0) {
struct list_head *entry;
struct page *page;
/* Get slab alloc is to come from. */
entry = n->slabs_partial.next;----------------(4)
if (entry == &n->slabs_partial) {
n->free_touched = 1;
entry = n->slabs_free.next;
if (entry == &n->slabs_free)
goto must_grow;
}
page = list_entry(entry, struct page, lru);--------------(5)
check_spinlock_acquired(cachep);
/*
* The slab was either on partial or free list so
* there must be at least one object available for
* allocation.
*/
BUG_ON(page->active >= cachep->num);------------(6)
while (page->active < cachep->num && batchcount--) {------------(7)
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
node));---------------(8)
}
/* move slabp to correct slabp list: */
list_del(&page->lru);-------------------------(9)
if (page->active == cachep->num)-----------------(10)
list_add(&page->lru, &n->slabs_full);
else
list_add(&page->lru, &n->slabs_partial);
}
must_grow:
n->free_objects -= ac->avail;
alloc_done:
spin_unlock(&n->list_lock);
if (unlikely(!ac->avail)) {
int x;
force_grow:
x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);--------------(11)
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
node = numa_mem_id();
/* no objects in sight? abort */
if (!x && (ac->avail == 0 || force_refill))
return NULL;
if (!ac->avail) /* objects refilled by interrupt? */-------------(12)
goto retry;
}
ac->touched = 1;
return ac_get_obj(cachep, ac, flags, force_refill);----------------(13)
}
(1)獲取本CPU的本地對象緩衝池描述符
(2)獲取slab節點描述符,前面已經初始化好
(3)如果slab節點的shared共享緩衝池有初始化,並且從中獲取batchcount個slab對象成功,則goto alloc_done.
(4)如果共享緩衝池分配失敗,則進入while循環,先後判斷slab節點的slabs_partial和slabs_free節點是否爲空,如果爲空,則goto must_grow
(5)如果slab partial或者free不爲空,則使用list_entry先獲取當前鏈表的中的lru對應的page結構體
(6)如果page->active多餘slab描述度中最大slab對象個數,報錯
(7)關鍵操作,通過while循環,從當前收個頁面爲page的緩衝池中取出batchcount個slab對象添加到CPU的本地對象緩衝池
(8)從緩衝池中獲取一個slab對象,添加到CPU本地對象緩衝池,此時page->active會加1,此時如果出現active=num且batchcount>0,則需要重新開始尋找可以分配slab對象的partial或者free鏈表,完成將ac->batchcount遷移到CPU本地對象緩衝池中
(9)將page從遠鏈表中刪除
(10)判斷當前page的active是否等於slab的num,如果等於則將page移動到full鏈表,否則就移動到partial中
(11)如果partial和free鏈表都爲空,則說明沒有空閒的slab對象,需要重新創建一個對象緩衝池
(12)cache_grow函數只是重新分配了slab對象緩衝池,然後將其添加到slab_free鏈表,此處avail仍然爲0,需要retry,此時會添加batchcount個slab對象到CPU本地對象緩衝池中,然後avail不等於0。
(13)從CPU本地對象緩衝池中獲取一個slab對象,完成slab對象的分配。
下面來看cache_grow()函數:
static int cache_grow(struct kmem_cache *cachep,
gfp_t flags, int nodeid, struct page *page)
{
void *freelist;
size_t offset;
gfp_t local_flags;
struct kmem_cache_node *n;
/*
* Be lazy and only check for valid flags here, keeping it out of the
* critical path in kmem_cache_alloc().
*/
if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
BUG();
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
/* Take the node list lock to change the colour_next on this node */
check_irq_off();
n = get_node(cachep, nodeid);
spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
offset = n->colour_next;------------------(1)
n->colour_next++;
if (n->colour_next >= cachep->colour)
n->colour_next = 0;
spin_unlock(&n->list_lock);
offset *= cachep->colour_off;
if (gfpflags_allow_blocking(local_flags))
local_irq_enable();
/*
* The test for missing atomic flag is performed here, rather than
* the more obvious place, simply to reduce the critical path length
* in kmem_cache_alloc(). If a caller is seriously mis-behaving they
* will eventually be caught here (where it matters).
*/
kmem_flagcheck(cachep, flags);
/*
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
if (!page)
page = kmem_getpages(cachep, local_flags, nodeid);-----------(2)
if (!page)
goto failed;
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);---------(3)
if (!freelist)
goto opps1;
slab_map_pages(cachep, page, freelist);---------------(4)
cache_init_objs(cachep, page);----------------(5)
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
check_irq_off();
spin_lock(&n->list_lock);
/* Make slab active. */
list_add_tail(&page->lru, &(n->slabs_free));---------------(6)
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num;-----------------------(7)
spin_unlock(&n->list_lock);
return 1;
opps1:
kmem_freepages(cachep, page);
failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return 0;
}
(1)colour_next表示下一個slab節點應該包含的colour數目,cache_colour從0開始增加,每個slab加1知道達到這個slab的最大值cachep->colour,然後再從0開始計算。
(2)爲slab對象緩衝池分配2^gfporder個page
(3)初始化freelist,page->active,page->s_mem,freelist爲slab對象緩衝池的首個page的虛擬地址+colour_next得到,page->active初始化爲0,page->s_mem爲首個page的虛擬地址+colour_next+cachep->freelist_size
(4)初始化page->slab_cache=cachep, page->freelist=freelist
(5)初始化obj的狀態爲OBJECT_FREE以及page->freelist[]數組
(6)將新申請的slab對象緩衝池添加到slab node的slabs_free上
(7)slab node的總的free_objects數量加上cachep->num個數.
以上爲slab對象分配的全過程,講到這裏可以畫出slab的整體的框架圖如下:
以上以全局slab_cache鏈表的一個節點kmalloc-64爲例介紹了一個slab節點各內存塊的分佈情況,其他節點以此類推,同時需要說明的是slab_cache全局鏈表上面是所有的通用的(如kmalloc-xxx)以及專用的(比如驅動專門申請命名的slab)slab描述符鏈接在一起組成的鏈表。