kernel-4.4 slab(二)

slab對象的分配:

slab對象的分配使用kmem_cache_alloc():

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	void *ret = slab_alloc(cachep, flags, _RET_IP_);

	trace_kmem_cache_alloc(_RET_IP_, ret,
			       cachep->object_size, cachep->size, flags);

	return ret;
}

實際執行函數爲slab_alloc()->__do_cache_alloc()->____cache_alloc()

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	void *objp;
	struct array_cache *ac;
	bool force_refill = false;

	check_irq_off();

	ac = cpu_cache_get(cachep);---------------(1)
	if (likely(ac->avail)) {------------------(2)
		ac->touched = 1;
		objp = ac_get_obj(cachep, ac, flags, false);------------(3)

		/*
		 * Allow for the possibility all avail objects are not allowed
		 * by the current flags
		 */
		if (objp) {
			STATS_INC_ALLOCHIT(cachep);
			goto out;
		}
		force_refill = true;----------------(4)
	}

	STATS_INC_ALLOCMISS(cachep);
	objp = cache_alloc_refill(cachep, flags, force_refill);--------------(5)
	/*
	 * the 'ac' may be updated by cache_alloc_refill(),
	 * and kmemleak_erase() requires its correct value.
	 */
	ac = cpu_cache_get(cachep);

out:
	/*
	 * To avoid a false negative, if an object that is in one of the
	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
	 * treat the array pointers as a reference to the object.
	 */
	if (objp)
		kmemleak_erase(&ac->entry[ac->avail]);
	return objp;
}

(1)獲取array_cache結構體,由於之前初始化slab描述符時已經初始化完畢,固肯定能獲取到。

(2)如果array_cache的avail值不爲0表示當前cpu本地緩衝池中有空閒對象,則調用ac_get_obj獲取slab對象。

(3)獲取slab對象:

static inline void *ac_get_obj(struct kmem_cache *cachep,
			struct array_cache *ac, gfp_t flags, bool force_refill)
{
	void *objp;

	if (unlikely(sk_memalloc_socks()))
		objp = __ac_get_obj(cachep, ac, flags, force_refill);
	else
		objp = ac->entry[--ac->avail];//直接獲取當前array_cache的entry數組最後一個成員

	return objp;
}

(4)如果沒有分配成功,則進入cache_alloc_refill()重新分配對象緩衝池

(5)如果array_cache不存在空閒對象,則直接調用核心函數cache_alloc_refill()獲取slab對象。

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
							bool force_refill)
{
	int batchcount;
	struct kmem_cache_node *n;
	struct array_cache *ac;
	int node;

	check_irq_off();
	node = numa_mem_id();
	if (unlikely(force_refill))
		goto force_grow;
retry:
	ac = cpu_cache_get(cachep);--------------(1)
	batchcount = ac->batchcount;
	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
		/*
		 * If there was little recent activity on this cache, then
		 * perform only a partial refill.  Otherwise we could generate
		 * refill bouncing.
		 */
		batchcount = BATCHREFILL_LIMIT;
	}
	n = get_node(cachep, node);--------------(2)

	BUG_ON(ac->avail > 0 || !n);
	spin_lock(&n->list_lock);

	/* See if we can refill from the shared array */
	if (n->shared && transfer_objects(ac, n->shared, batchcount)) {---------(3)
		n->shared->touched = 1;
		goto alloc_done;
	}

	while (batchcount > 0) {
		struct list_head *entry;
		struct page *page;
		/* Get slab alloc is to come from. */
		entry = n->slabs_partial.next;----------------(4)
		if (entry == &n->slabs_partial) {
			n->free_touched = 1;
			entry = n->slabs_free.next;
			if (entry == &n->slabs_free)
				goto must_grow;
		}

		page = list_entry(entry, struct page, lru);--------------(5)
		check_spinlock_acquired(cachep);

		/*
		 * The slab was either on partial or free list so
		 * there must be at least one object available for
		 * allocation.
		 */
		BUG_ON(page->active >= cachep->num);------------(6)

		while (page->active < cachep->num && batchcount--) {------------(7)
			STATS_INC_ALLOCED(cachep);
			STATS_INC_ACTIVE(cachep);
			STATS_SET_HIGH(cachep);

			ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
									node));---------------(8)
		}

		/* move slabp to correct slabp list: */
		list_del(&page->lru);-------------------------(9)
		if (page->active == cachep->num)-----------------(10)
			list_add(&page->lru, &n->slabs_full);
		else
			list_add(&page->lru, &n->slabs_partial);
	}

must_grow:
	n->free_objects -= ac->avail;
alloc_done:
	spin_unlock(&n->list_lock);

	if (unlikely(!ac->avail)) {
		int x;
force_grow:
		x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);--------------(11)

		/* cache_grow can reenable interrupts, then ac could change. */
		ac = cpu_cache_get(cachep);
		node = numa_mem_id();

		/* no objects in sight? abort */
		if (!x && (ac->avail == 0 || force_refill))
			return NULL;

		if (!ac->avail)		/* objects refilled by interrupt? */-------------(12)
			goto retry;
	}
	ac->touched = 1;

	return ac_get_obj(cachep, ac, flags, force_refill);----------------(13)
}

(1)獲取本CPU的本地對象緩衝池描述符

(2)獲取slab節點描述符,前面已經初始化好

(3)如果slab節點的shared共享緩衝池有初始化,並且從中獲取batchcount個slab對象成功,則goto alloc_done.

(4)如果共享緩衝池分配失敗,則進入while循環,先後判斷slab節點的slabs_partial和slabs_free節點是否爲空,如果爲空,則goto must_grow

(5)如果slab partial或者free不爲空,則使用list_entry先獲取當前鏈表的中的lru對應的page結構體

(6)如果page->active多餘slab描述度中最大slab對象個數,報錯

(7)關鍵操作,通過while循環,從當前收個頁面爲page的緩衝池中取出batchcount個slab對象添加到CPU的本地對象緩衝池

(8)從緩衝池中獲取一個slab對象,添加到CPU本地對象緩衝池,此時page->active會加1,此時如果出現active=num且batchcount>0,則需要重新開始尋找可以分配slab對象的partial或者free鏈表,完成將ac->batchcount遷移到CPU本地對象緩衝池中

(9)將page從遠鏈表中刪除

(10)判斷當前page的active是否等於slab的num,如果等於則將page移動到full鏈表,否則就移動到partial中

(11)如果partial和free鏈表都爲空,則說明沒有空閒的slab對象,需要重新創建一個對象緩衝池

(12)cache_grow函數只是重新分配了slab對象緩衝池,然後將其添加到slab_free鏈表,此處avail仍然爲0,需要retry,此時會添加batchcount個slab對象到CPU本地對象緩衝池中,然後avail不等於0。

(13)從CPU本地對象緩衝池中獲取一個slab對象,完成slab對象的分配。

下面來看cache_grow()函數:

static int cache_grow(struct kmem_cache *cachep,
		gfp_t flags, int nodeid, struct page *page)
{
	void *freelist;
	size_t offset;
	gfp_t local_flags;
	struct kmem_cache_node *n;

	/*
	 * Be lazy and only check for valid flags here,  keeping it out of the
	 * critical path in kmem_cache_alloc().
	 */
	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
		pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
		BUG();
	}
	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

	/* Take the node list lock to change the colour_next on this node */
	check_irq_off();
	n = get_node(cachep, nodeid);
	spin_lock(&n->list_lock);

	/* Get colour for the slab, and cal the next value. */
	offset = n->colour_next;------------------(1)
	n->colour_next++;
	if (n->colour_next >= cachep->colour)
		n->colour_next = 0;
	spin_unlock(&n->list_lock);

	offset *= cachep->colour_off;

	if (gfpflags_allow_blocking(local_flags))
		local_irq_enable();

	/*
	 * The test for missing atomic flag is performed here, rather than
	 * the more obvious place, simply to reduce the critical path length
	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
	 * will eventually be caught here (where it matters).
	 */
	kmem_flagcheck(cachep, flags);

	/*
	 * Get mem for the objs.  Attempt to allocate a physical page from
	 * 'nodeid'.
	 */
	if (!page)
		page = kmem_getpages(cachep, local_flags, nodeid);-----------(2)
	if (!page)
		goto failed;

	/* Get slab management. */
	freelist = alloc_slabmgmt(cachep, page, offset,
			local_flags & ~GFP_CONSTRAINT_MASK, nodeid);---------(3)
	if (!freelist)
		goto opps1;

	slab_map_pages(cachep, page, freelist);---------------(4)

	cache_init_objs(cachep, page);----------------(5)

	if (gfpflags_allow_blocking(local_flags))
		local_irq_disable();
	check_irq_off();
	spin_lock(&n->list_lock);

	/* Make slab active. */
	list_add_tail(&page->lru, &(n->slabs_free));---------------(6)
	STATS_INC_GROWN(cachep);
	n->free_objects += cachep->num;-----------------------(7)
	spin_unlock(&n->list_lock);
	return 1;
opps1:
	kmem_freepages(cachep, page);
failed:
	if (gfpflags_allow_blocking(local_flags))
		local_irq_disable();
	return 0;
}

(1)colour_next表示下一個slab節點應該包含的colour數目,cache_colour從0開始增加,每個slab加1知道達到這個slab的最大值cachep->colour,然後再從0開始計算。

(2)爲slab對象緩衝池分配2^gfporder個page

(3)初始化freelist,page->active,page->s_mem,freelist爲slab對象緩衝池的首個page的虛擬地址+colour_next得到,page->active初始化爲0,page->s_mem爲首個page的虛擬地址+colour_next+cachep->freelist_size

(4)初始化page->slab_cache=cachep, page->freelist=freelist

(5)初始化obj的狀態爲OBJECT_FREE以及page->freelist[]數組

(6)將新申請的slab對象緩衝池添加到slab node的slabs_free上

(7)slab node的總的free_objects數量加上cachep->num個數.

以上爲slab對象分配的全過程,講到這裏可以畫出slab的整體的框架圖如下:

以上以全局slab_cache鏈表的一個節點kmalloc-64爲例介紹了一個slab節點各內存塊的分佈情況,其他節點以此類推,同時需要說明的是slab_cache全局鏈表上面是所有的通用的(如kmalloc-xxx)以及專用的(比如驅動專門申請命名的slab)slab描述符鏈接在一起組成的鏈表。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章