前面已經講了slab描述符的初始化以及slab對象的創建,下面是slab對象的回收:
slab對象的回收通過kmem_cache_free來完成:
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
cachep = cache_from_obj(cachep, objp);
if (!cachep)
return;
local_irq_save(flags);
debug_check_no_locks_freed(objp, cachep->object_size);
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, cachep->object_size);
__cache_free(cachep, objp, _RET_IP_);
local_irq_restore(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
kmem_cache_free()->__cache_free()
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
objp = cache_free_debugcheck(cachep, objp, caller);
kmemcheck_slab_free(cachep, objp, cachep->object_size);
/*
* Skip calling cache_free_alien() when the platform is not numa.
* This will avoid cache misses that happen while accessing slabp (which
* is per page memory reference) to get nodeid. Instead use a global
* variable to skip the call, which is mostly likely to be present in
* the cache.
*/
if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
return;
if (ac->avail < ac->limit) {
STATS_INC_FREEHIT(cachep);
} else {
STATS_INC_FREEMISS(cachep);
cache_flusharray(cachep, ac);-----------(1)
}
ac_put_obj(cachep, ac, objp);-----------------(2)
}
(1)當本地對象緩衝池中的free object大於等於本地對象緩衝池free object個數limit時會調用cache_flusharray()釋放slab對象
(2)否則直接將slab對象釋放到本地對象緩衝池entry中ac->entry[ac->avail++] = objp;完成釋放過程。
下面看看cache_flusharray():
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
LIST_HEAD(list);
batchcount = ac->batchcount;
check_irq_off();
n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
if (max) {
if (batchcount > max)
batchcount = max;
memcpy(&(shared_array->entry[shared_array->avail]),
ac->entry, sizeof(void *) * batchcount);----------(1)
shared_array->avail += batchcount;
goto free_done;
}
}
free_block(cachep, ac->entry, batchcount, node, &list);----------(2)
free_done:
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);---------(3)
}
(1)如果存在shared鏈表,則從CPU本地對象緩衝池中copy batchcount個slab對象到shared list中
(2)如果共享對象緩衝池中的空閒對象個數等於limit閾值,則會調用free_block來釋放對象
(3)將本地對象緩衝池中剩餘的對象移到數組的頭部。
free_block():
static void free_block(struct kmem_cache *cachep, void **objpp,
int nr_objects, int node, struct list_head *list)
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
for (i = 0; i < nr_objects; i++) {
void *objp;
struct page *page;
clear_obj_pfmemalloc(&objpp[i]);
objp = objpp[i];
page = virt_to_head_page(objp);---------------(1)
list_del(&page->lru);--------------(2)
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);----------------(3)
STATS_DEC_ACTIVE(cachep);
n->free_objects++;---------------(4)
/* fixup slab chains */
if (page->active == 0) {------------(5)
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
list_add_tail(&page->lru, list);
} else {
list_add(&page->lru, &n->slabs_free);-----------(6)
}
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
list_add_tail(&page->lru, &n->slabs_partial);-----------(7)
}
}
}
(1)從slab對象獲取slab對象緩衝池的首個page結構體
(2)先暫時將page從原slab list裏面刪除
(3)釋放一個slab對象((freelist_idx_t *)(page->freelist))[idx] = val;,即設置page->frelist數組元素值爲slab對象對應的index,然後active減1(釋放掉一個object,活躍數就得減1),freelist數組的下標爲page->active大小,這裏可以看出slab對象緩衝池的首個page->freelist數組存放的是所有空閒slab對象的索引值
(4)slab node的free object數加1
(5)當active爲0時,說明此時slab對象緩衝池中全部爲free object,如果free_object總值大於limit,則將整個slab對象緩衝池添加到無名鏈表(暫不清楚 此爲何意)
(6)如果free_object總值小於limit,則直接將slab對象緩衝池添加到slabs_free鏈表中
(7)如果page->active不爲0,說明此時slab對象緩衝池中存放有非free的slab object,則將此slab對象緩衝池添加到slabs_patial鏈表中,再開始新的for循環,直到完全釋放完成batchcount個slab對象。
到此slab的釋放講完,最後講一下幾個獲取和釋放slab對象的函數:
static inline void *ac_get_obj(struct kmem_cache *cachep,
struct array_cache *ac, gfp_t flags, bool force_refill)
{
void *objp;
if (unlikely(sk_memalloc_socks()))
objp = __ac_get_obj(cachep, ac, flags, force_refill);
else
objp = ac->entry[--ac->avail];
return objp;
}
ac_get_obj()函數從本地對象緩衝池的entry數組free元素的末尾獲取一個slab object,同時avail減1,表示CPU本地對象緩衝池少了一個free slab object。
static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
void *objp)
{
if (unlikely(sk_memalloc_socks()))
objp = __ac_put_obj(cachep, ac, objp);
ac->entry[ac->avail++] = objp;
}
ac_put_obj()函數是CPU本地對象緩衝池的entry數組獲取一個來自share緩衝池或者slab對象緩衝池的slab對象,同時avail加1,b表示CPU本地對象緩衝池多了一個free slab object。
static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
int nodeid)
{
void *objp;
objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
#if DEBUG
WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
#endif
return objp;
}
slab_get_obj()函數表示從slab對象緩衝池中獲取一個slab對象,給分配slab對象的user使用,此時page->active加1。
static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
{
return ((freelist_idx_t *)page->freelist)[idx];//從page->freelist數組末尾獲取一個free slab object的索引,因爲idx一般爲page->active的大小
}
static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
unsigned int idx)
{
return page->s_mem + cache->size * idx;//通過獲取到的free slab object的index乘以object
//size+第一個slabobject得起始地址,得到想要獲取的slab object的地址
}
最後得到slab object的地址。
static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
void *objp, int nodeid)
{
unsigned int objnr = obj_to_index(cachep, page, objp);
page->active--;
set_free_obj(page, page->active, objnr);
}
slab_put_obj()將想要釋放的slab object地址轉化爲索引,然後賦值到page->freelist[page->active],即標記此slabobject爲free object,同時page->active減1.
由以上可以看出,凡是從slab對象緩衝池中獲取或者歸還slab object,則slab對象緩衝池的首個page->active會發生加減1變化。
凡是從CPU本地對象緩衝池中獲取或者歸還slab object,此時ac->avail會發生加減1變化。