前面已经讲了slab描述符的初始化以及slab对象的创建,下面是slab对象的回收:
slab对象的回收通过kmem_cache_free来完成:
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
cachep = cache_from_obj(cachep, objp);
if (!cachep)
return;
local_irq_save(flags);
debug_check_no_locks_freed(objp, cachep->object_size);
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, cachep->object_size);
__cache_free(cachep, objp, _RET_IP_);
local_irq_restore(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
kmem_cache_free()->__cache_free()
static inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
objp = cache_free_debugcheck(cachep, objp, caller);
kmemcheck_slab_free(cachep, objp, cachep->object_size);
/*
* Skip calling cache_free_alien() when the platform is not numa.
* This will avoid cache misses that happen while accessing slabp (which
* is per page memory reference) to get nodeid. Instead use a global
* variable to skip the call, which is mostly likely to be present in
* the cache.
*/
if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
return;
if (ac->avail < ac->limit) {
STATS_INC_FREEHIT(cachep);
} else {
STATS_INC_FREEMISS(cachep);
cache_flusharray(cachep, ac);-----------(1)
}
ac_put_obj(cachep, ac, objp);-----------------(2)
}
(1)当本地对象缓冲池中的free object大于等于本地对象缓冲池free object个数limit时会调用cache_flusharray()释放slab对象
(2)否则直接将slab对象释放到本地对象缓冲池entry中ac->entry[ac->avail++] = objp;完成释放过程。
下面看看cache_flusharray():
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
int batchcount;
struct kmem_cache_node *n;
int node = numa_mem_id();
LIST_HEAD(list);
batchcount = ac->batchcount;
check_irq_off();
n = get_node(cachep, node);
spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
if (max) {
if (batchcount > max)
batchcount = max;
memcpy(&(shared_array->entry[shared_array->avail]),
ac->entry, sizeof(void *) * batchcount);----------(1)
shared_array->avail += batchcount;
goto free_done;
}
}
free_block(cachep, ac->entry, batchcount, node, &list);----------(2)
free_done:
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);---------(3)
}
(1)如果存在shared链表,则从CPU本地对象缓冲池中copy batchcount个slab对象到shared list中
(2)如果共享对象缓冲池中的空闲对象个数等于limit阈值,则会调用free_block来释放对象
(3)将本地对象缓冲池中剩余的对象移到数组的头部。
free_block():
static void free_block(struct kmem_cache *cachep, void **objpp,
int nr_objects, int node, struct list_head *list)
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
for (i = 0; i < nr_objects; i++) {
void *objp;
struct page *page;
clear_obj_pfmemalloc(&objpp[i]);
objp = objpp[i];
page = virt_to_head_page(objp);---------------(1)
list_del(&page->lru);--------------(2)
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp, node);----------------(3)
STATS_DEC_ACTIVE(cachep);
n->free_objects++;---------------(4)
/* fixup slab chains */
if (page->active == 0) {------------(5)
if (n->free_objects > n->free_limit) {
n->free_objects -= cachep->num;
list_add_tail(&page->lru, list);
} else {
list_add(&page->lru, &n->slabs_free);-----------(6)
}
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
list_add_tail(&page->lru, &n->slabs_partial);-----------(7)
}
}
}
(1)从slab对象获取slab对象缓冲池的首个page结构体
(2)先暂时将page从原slab list里面删除
(3)释放一个slab对象((freelist_idx_t *)(page->freelist))[idx] = val;,即设置page->frelist数组元素值为slab对象对应的index,然后active减1(释放掉一个object,活跃数就得减1),freelist数组的下标为page->active大小,这里可以看出slab对象缓冲池的首个page->freelist数组存放的是所有空闲slab对象的索引值
(4)slab node的free object数加1
(5)当active为0时,说明此时slab对象缓冲池中全部为free object,如果free_object总值大于limit,则将整个slab对象缓冲池添加到无名链表(暂不清楚 此为何意)
(6)如果free_object总值小于limit,则直接将slab对象缓冲池添加到slabs_free链表中
(7)如果page->active不为0,说明此时slab对象缓冲池中存放有非free的slab object,则将此slab对象缓冲池添加到slabs_patial链表中,再开始新的for循环,直到完全释放完成batchcount个slab对象。
到此slab的释放讲完,最后讲一下几个获取和释放slab对象的函数:
static inline void *ac_get_obj(struct kmem_cache *cachep,
struct array_cache *ac, gfp_t flags, bool force_refill)
{
void *objp;
if (unlikely(sk_memalloc_socks()))
objp = __ac_get_obj(cachep, ac, flags, force_refill);
else
objp = ac->entry[--ac->avail];
return objp;
}
ac_get_obj()函数从本地对象缓冲池的entry数组free元素的末尾获取一个slab object,同时avail减1,表示CPU本地对象缓冲池少了一个free slab object。
static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
void *objp)
{
if (unlikely(sk_memalloc_socks()))
objp = __ac_put_obj(cachep, ac, objp);
ac->entry[ac->avail++] = objp;
}
ac_put_obj()函数是CPU本地对象缓冲池的entry数组获取一个来自share缓冲池或者slab对象缓冲池的slab对象,同时avail加1,b表示CPU本地对象缓冲池多了一个free slab object。
static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
int nodeid)
{
void *objp;
objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
#if DEBUG
WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
#endif
return objp;
}
slab_get_obj()函数表示从slab对象缓冲池中获取一个slab对象,给分配slab对象的user使用,此时page->active加1。
static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
{
return ((freelist_idx_t *)page->freelist)[idx];//从page->freelist数组末尾获取一个free slab object的索引,因为idx一般为page->active的大小
}
static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
unsigned int idx)
{
return page->s_mem + cache->size * idx;//通过获取到的free slab object的index乘以object
//size+第一个slabobject得起始地址,得到想要获取的slab object的地址
}
最后得到slab object的地址。
static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
void *objp, int nodeid)
{
unsigned int objnr = obj_to_index(cachep, page, objp);
page->active--;
set_free_obj(page, page->active, objnr);
}
slab_put_obj()将想要释放的slab object地址转化为索引,然后赋值到page->freelist[page->active],即标记此slabobject为free object,同时page->active减1.
由以上可以看出,凡是从slab对象缓冲池中获取或者归还slab object,则slab对象缓冲池的首个page->active会发生加减1变化。
凡是从CPU本地对象缓冲池中获取或者归还slab object,此时ac->avail会发生加减1变化。