Linux Slab分配器(二)--初始化

水平有限，描述不當之處還請之處，轉載請註明出處http://blog.csdn.net/vanbreaker/article/details/7666959

在前文中介紹了slab所涉及到的數據結構， slab分配器的初始化工作都是圍繞這些數據結構來展開的，主要是針對以下兩個問題:

1.創建kmem_cache高速緩存用來存儲所有的cache描述符

2.創建array_cache和kmem_list3高速緩存用來存儲slab數據結構中的這兩個關鍵結構

這裏明顯有點自相矛盾，那就是slab管理器尚未建立起來，又如何靠slab分配高速緩存來給這些結構分配空間呢？

解決第一個問題的方法是直接靜態定義一個名爲cache_cache的kmem_cache結構，來管理所有的kmem_cache描述符，對於array_cache和kmem_list3，內核也是先靜態定義，然後建立起普通高速緩存(general cache)，再使用kmalloc分配普通高速緩存空間來替代之前靜態定義的部分。

普通高速緩存是一組大小按幾何倍數增長的高速緩存的合集，一個普通高速緩存用如下結構描述

/* Size description struct for general caches. */
struct cache_sizes {
	size_t		 	cs_size;   /*general cache的大小*/
	struct kmem_cache	*cs_cachep;         /*general cache的cache描述符指針*/
#ifdef CONFIG_ZONE_DMA
	struct kmem_cache	*cs_dmacachep;
#endif
};

普通高速緩存的大小由malloc_sizes表來確定

/*
 * These are the default caches for kmalloc. Custom caches can have other sizes.
 */
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h>
	CACHE(ULONG_MAX)
#undef CACHE
};

其中<linux/kmalloc_sizes.h>中的內容爲

#if (PAGE_SIZE == 4096)
	CACHE(32)
#endif
	CACHE(64)
#if L1_CACHE_BYTES < 64
	CACHE(96)
#endif
	CACHE(128)
#if L1_CACHE_BYTES < 128
	CACHE(192)
#endif
	CACHE(256)
	CACHE(512)
	CACHE(1024)
	CACHE(2048)
	CACHE(4096)
	CACHE(8192)
	CACHE(16384)
	CACHE(32768)
	CACHE(65536)
	CACHE(131072)
#if KMALLOC_MAX_SIZE >= 262144
	CACHE(262144)
#endif
#if KMALLOC_MAX_SIZE >= 524288
	CACHE(524288)
#endif
#if KMALLOC_MAX_SIZE >= 1048576
	CACHE(1048576)
#endif
#if KMALLOC_MAX_SIZE >= 2097152
	CACHE(2097152)
#endif
#if KMALLOC_MAX_SIZE >= 4194304
	CACHE(4194304)
#endif
#if KMALLOC_MAX_SIZE >= 8388608
	CACHE(8388608)
#endif
#if KMALLOC_MAX_SIZE >= 16777216
	CACHE(16777216)
#endif
#if KMALLOC_MAX_SIZE >= 33554432
	CACHE(33554432)
#endif

cache_cache的初始化和普通高速緩存的建立由start_kernel()-->mm_init()-->kmem_cache_init()函數來完成，下面就來看具體的初始化代碼

void __init kmem_cache_init(void)
{
	size_t left_over;
	struct cache_sizes *sizes;
	struct cache_names *names;
	int i;
	int order;
	int node;

	if (num_possible_nodes() == 1)
		use_alien_caches = 0;

	/*初始化靜態L3變量initkmem_list3*/
	for (i = 0; i < NUM_INIT_LISTS; i++) {
		kmem_list3_init(&initkmem_list3[i]);
		if (i < MAX_NUMNODES)
			cache_cache.nodelists[i] = NULL;
	}
	/*將cache_cache和initkmem_list3相關聯*/
	set_up_list3s(&cache_cache, CACHE_CACHE);

	/*
	 * Fragmentation resistance on low memory - only use bigger
	 * page orders on machines with more than 32MB of memory.
	 */
	if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
		slab_break_gfp_order = BREAK_GFP_ORDER_HI;

	/* Bootstrap is tricky, because several objects are allocated
	 * from caches that do not exist yet:
	 * 1) initialize the cache_cache cache: it contains the struct
	 *    kmem_cache structures of all caches, except cache_cache itself:
	 *    cache_cache is statically allocated.
	 *    Initially an __init data area is used for the head array and the
	 *    kmem_list3 structures, it's replaced with a kmalloc allocated
	 *    array at the end of the bootstrap.
	 * 2) Create the first kmalloc cache.
	 *    The struct kmem_cache for the new cache is allocated normally.
	 *    An __init data area is used for the head array.
	 * 3) Create the remaining kmalloc caches, with minimally sized
	 *    head arrays.
	 * 4) Replace the __init data head arrays for cache_cache and the first
	 *    kmalloc cache with kmalloc allocated arrays.
	 * 5) Replace the __init data for kmem_list3 for cache_cache and
	 *    the other cache's with kmalloc allocated memory.
	 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
	 */

	node = numa_node_id();

	/*初始化cache_cache的其餘部分*/

	/* 1) create the cache_cache */
	INIT_LIST_HEAD(&cache_chain);
	list_add(&cache_cache.next, &cache_chain);
	cache_cache.colour_off = cache_line_size();
	cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
	cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

	/*
	 * struct kmem_cache size depends on nr_node_ids, which
	 * can be less than MAX_NUMNODES.
	 */
	cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
				 nr_node_ids * sizeof(struct kmem_list3 *);
#if DEBUG
	cache_cache.obj_size = cache_cache.buffer_size;
#endif
	cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
					cache_line_size());
	cache_cache.reciprocal_buffer_size =
		reciprocal_value(cache_cache.buffer_size);

	/*計算cache_cache的剩餘空間以及slab中對象的數目，order決定了slab的大小(PAGE_SIZE<<order)*/
	for (order = 0; order < MAX_ORDER; order++) {
		cache_estimate(order, cache_cache.buffer_size,
			cache_line_size(), 0, &left_over, &cache_cache.num);
		/*當該order計算出來的num,即slab中對象的數目不爲0時，則跳出循環*/
		if (cache_cache.num)
			break;
	}
	BUG_ON(!cache_cache.num);
	cache_cache.gfporder = order;/*確定分配給每個slab的頁數的對數*/
	cache_cache.colour = left_over / cache_cache.colour_off;/*確定可用顏色的數目*/
	/*確定slab管理區的大小，即slab描述符以及kmem_bufctl_t數組*/
	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
				      sizeof(struct slab), cache_line_size());

	/* 2+3) create the kmalloc caches */
	sizes = malloc_sizes;
	names = cache_names;

	/*
	 * Initialize the caches that provide memory for the array cache and the
	 * kmem_list3 structures first.  Without this, further allocations will
	 * bug.
	 */
    /*爲了後面能夠調用kmalloc()創建per-CPU高速緩存和kmem_list3高速緩存，
       這裏必須先創建大小相應的general cache*/
	sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
					sizes[INDEX_AC].cs_size,
					ARCH_KMALLOC_MINALIGN,
					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
					NULL);

	/*如果AC和L3在malloc_sizes中的偏移不一樣，也就是說它們的大小不屬於同一級別，
	 則創建L3的gerneral cache，否則兩者共用一個gerneral cache*/
	if (INDEX_AC != INDEX_L3) {
		sizes[INDEX_L3].cs_cachep =
			kmem_cache_create(names[INDEX_L3].name,
				sizes[INDEX_L3].cs_size,
				ARCH_KMALLOC_MINALIGN,
				ARCH_KMALLOC_FLAGS|SLAB_PANIC,
				NULL);
	}

	slab_early_init = 0;

	/*創建各級的gerneral cache*/
	while (sizes->cs_size != ULONG_MAX) {
		/*
		 * For performance, all the general caches are L1 aligned.
		 * This should be particularly beneficial on SMP boxes, as it
		 * eliminates "false sharing".
		 * Note for systems short on memory removing the alignment will
		 * allow tighter packing of the smaller caches.
		 */
		if (!sizes->cs_cachep) {
			sizes->cs_cachep = kmem_cache_create(names->name,
					sizes->cs_size,
					ARCH_KMALLOC_MINALIGN,
					ARCH_KMALLOC_FLAGS|SLAB_PANIC,
					NULL);
		}
#ifdef CONFIG_ZONE_DMA
		sizes->cs_dmacachep = kmem_cache_create(
					names->name_dma,
					sizes->cs_size,
					ARCH_KMALLOC_MINALIGN,
					ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
						SLAB_PANIC,
					NULL);
#endif
		sizes++;
		names++;
	}
	/* 4) Replace the bootstrap head arrays */
	{
		struct array_cache *ptr;

		/*這裏調用kmalloc()爲cache_cache創建per-CPU高速緩存*/
		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

		BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
		/*將靜態定義的initarray_cache中的array_cache拷貝到malloc申請到的空間中*/
		memcpy(ptr, cpu_cache_get(&cache_cache),
		       sizeof(struct arraycache_init));
		/*
		 * Do not assume that spinlocks can be initialized via memcpy:
		 */
		spin_lock_init(&ptr->lock);

		/*將cache_cache與保存per-CPU高速緩存的空間關聯*/
		cache_cache.array[smp_processor_id()] = ptr;

		/*爲之前創建的AC gerneral cache創建per-CPU高速緩存，替換靜態定義的initarray_generic.cache*/
		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

		BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
		       != &initarray_generic.cache);
		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
		       sizeof(struct arraycache_init));
		/*
		 * Do not assume that spinlocks can be initialized via memcpy:
		 */
		spin_lock_init(&ptr->lock);

		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
		    ptr;
	}
	/* 5) Replace the bootstrap kmem_list3's */
	{
		int nid;

		for_each_online_node(nid) {
			
			/*爲cache_cache的kmem_list3申請高速緩存空間，並替換靜態定義的initkmem_list3*/
			init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

			/*爲AC的kmem_list3申請高速緩存空間，並替換靜態定義的initkmem_list3*/
			init_list(malloc_sizes[INDEX_AC].cs_cachep,
				  &initkmem_list3[SIZE_AC + nid], nid);

			if (INDEX_AC != INDEX_L3) {
			/*爲L3的kmem_list3申請高速緩存空間，並替換靜態定義的initkmem_list3*/
				init_list(malloc_sizes[INDEX_L3].cs_cachep,
					  &initkmem_list3[SIZE_L3 + nid], nid);
			}
		}
	}

	g_cpucache_up = EARLY;
}

前面大部分的代碼都是圍繞cache_cache展開的，主要是將cache_cache同靜態kmem_list3進行關聯，將cache_cache添加到cache_chain鏈表中，並且計算初始化內部的一些數據項
現在還沒有高速緩存來存儲cache_cache中的kmem_list3描述符和array_cache描述符，因此下面就要調用kmem_cache_create()建立高速緩存來存儲這兩種描述符
內核使用g_cpucache_up這個枚舉量來表示slab分配器的初始化進度

static enum {
	NONE,
	PARTIAL_AC,
	PARTIAL_L3,
	EARLY,
	FULL
} g_cpucache_up;

這個值的更新是在kmem_cache_create()-->setup_cpu_cache()函數中進行更新的，每調用一次kmem_cache_create(),g_cpucache_up的值就加1，直到它等於EARLY，比如說第一次調用kmem_cache_create()創建了AC(array_cache)的高速緩存，那麼g_cpucache_up由NONE變爲PARTIAL_AC，那麼下次調用kmem_cache_create()創建L3高速緩存時，內核就知道AC高速緩存已經準備好了，也就是說可以在array_cache高速緩存中爲L3高速緩存描述符的array_cache描述符分配高速緩存空間了。

創建了AC和L3高速緩存後就循環創建各級普通高速緩存，此時創建的高速緩存都是完整的了！也就是說裏面的結構變量都已經是存儲在相應的高速緩存中
由於AC高速緩存已經創建，因此kmalloc()動態創建一個array_cache對象替換cache_cache的靜態array_cache
由於AC高速緩存描述符本身的array_cache描述符還未動態創建，因此同樣kmalloc()動態創建一個array_cache替換AC高速緩存的靜態array_cache
爲cache_cache,AC,L3高速緩存分別動態創建kmem_list描述符對象，替換靜態的initkmem_list3
將g_cpucache_up置爲EARLY,表示slab分配器的初始化已初步完成

slab分配器初始化工作的最後一步由kmem_cache_init_late()函數完成，這個函數就不做詳細分析了，它的工作就是設置cache_cache和各級普通高速緩存中的array_cache本地高速緩存的相關屬性

void __init kmem_cache_init_late(void)
{
	struct kmem_cache *cachep;

	/* 6) resize the head arrays to their final sizes */
	mutex_lock(&cache_chain_mutex);
	list_for_each_entry(cachep, &cache_chain, next)
		if (enable_cpucache(cachep, GFP_NOWAIT))
			BUG();
	mutex_unlock(&cache_chain_mutex);

	/* Done! */
	g_cpucache_up = FULL;   /*slab初始化完成*/

	/* Annotate slab for lockdep -- annotate the malloc caches */
	init_lock_keys();

	/*
	 * Register a cpu startup notifier callback that initializes
	 * cpu_cache_get for all new cpus
	 */
	register_cpu_notifier(&cpucache_notifier);

	/*
	 * The reap timers are started later, with a module init call: That part
	 * of the kernel is not yet operational.
	 */
}

橙色逆流

發佈了59 篇原創文章 · 獲贊 33 · 訪問量 50萬+

私信關注

Linux Slab分配器(二)--初始化

C語言--右移左移

12款高效開源Wiki系統推薦，打造團隊知識管理利器

一個開源且全面的C#算法實戰教程

dotnet 基於 DirectML 控制檯運行 Phi-3 模型

自定義MyBatis插件

一款.NET開源、功能強大、跨平臺的繪圖庫 - OxyPlot

常用的 Git 指令

鼠標控制軟件有可能和虛擬機軟件產生衝突

sm4加密工具類

Linux Slub分配器(四)--分配對象

第一個Linux驅動-流水燈

Linux Slab分配器(六)--創建slab和銷燬slab

Linux Slub分配器(五)--釋放對象

Linux Slub分配器(二)--初始化

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結