5.8.2 初始化slab分配器
回到mm_init()函數,繼續走,下一個函數kmem_cache_init(),也是重點函數,用於初始化內核slab分配體系。這個函數來自文件mm/slab.c
1375void __init kmem_cache_init(void) 1376{ 1377 size_t left_over; 1378 struct cache_sizes *sizes; 1379 struct cache_names *names; 1380 int i; 1381 int order; 1382 int node; 1383 1384 if (num_possible_nodes() == 1) 1385 use_alien_caches = 0; 1386 1387 for (i = 0; i < NUM_INIT_LISTS; i++) { 1388 kmem_list3_init(&initkmem_list3[i]); 1389 if (i < MAX_NUMNODES) 1390 cache_cache.nodelists[i] = NULL; 1391 } 1392 set_up_list3s(&cache_cache, CACHE_CACHE); 1393 …… 1398 if (totalram_pages > (32 << 20) >> PAGE_SHIFT) 1399 slab_break_gfp_order = BREAK_GFP_ORDER_HI; 1400 …… 1420 1421 node = numa_node_id(); 1422 1423 /* 1) create the cache_cache */ 1424 INIT_LIST_HEAD(&cache_chain); 1425 list_add(&cache_cache.next, &cache_chain); 1426 cache_cache.colour_off = cache_line_size(); 1427 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1428 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; 1429 …… 1434 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + 1435 nr_node_ids * sizeof(struct kmem_list3 *); 1436#if DEBUG 1437 cache_cache.obj_size = cache_cache.buffer_size; 1438#endif 1439 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, 1440 cache_line_size()); 1441 cache_cache.reciprocal_buffer_size = 1442 reciprocal_value(cache_cache.buffer_size); 1443 1444 for (order = 0; order < MAX_ORDER; order++) { 1445 cache_estimate(order, cache_cache.buffer_size, 1446 cache_line_size(), 0, &left_over, &cache_cache.num); 1447 if (cache_cache.num) 1448 break; 1449 } 1450 BUG_ON(!cache_cache.num); 1451 cache_cache.gfporder = order; 1452 cache_cache.colour = left_over / cache_cache.colour_off; 1453 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1454 sizeof(struct slab), cache_line_size()); 1455 1456 /* 2+3) create the kmalloc caches */ 1457 sizes = malloc_sizes; 1458 names = cache_names; 1459 …… 1466 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1467 sizes[INDEX_AC].cs_size, 1468 ARCH_KMALLOC_MINALIGN, 1469 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1470 NULL); 1471 1472 if (INDEX_AC != INDEX_L3) { 1473 sizes[INDEX_L3].cs_cachep = 1474 kmem_cache_create(names[INDEX_L3].name, 1475 sizes[INDEX_L3].cs_size, 1476 ARCH_KMALLOC_MINALIGN, 1477 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1478 NULL); 1479 } 1480 1481 slab_early_init = 0; 1482 1483 while (sizes->cs_size != ULONG_MAX) { …… 1491 if (!sizes->cs_cachep) { 1492 sizes->cs_cachep = kmem_cache_create(names->name, 1493 sizes->cs_size, 1494 ARCH_KMALLOC_MINALIGN, 1495 ARCH_KMALLOC_FLAGS|SLAB_PANIC, 1496 NULL); 1497 } 1498#ifdef CONFIG_ZONE_DMA 1499 sizes->cs_dmacachep = kmem_cache_create( 1500 names->name_dma, 1501 sizes->cs_size, 1502 ARCH_KMALLOC_MINALIGN, 1503 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| 1504 SLAB_PANIC, 1505 NULL); 1506#endif 1507 sizes++; 1508 names++; 1509 } 1510 /* 4) Replace the bootstrap head arrays */ 1511 { 1512 struct array_cache *ptr; 1513 1514 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); 1515 1516 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1517 memcpy(ptr, cpu_cache_get(&cache_cache), 1518 sizeof(struct arraycache_init)); 1519 /* 1520 * Do not assume that spinlocks can be initialized via memcpy: 1521 */ 1522 spin_lock_init(&ptr->lock); 1523 1524 cache_cache.array[smp_processor_id()] = ptr; 1525 1526 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); 1527 1528 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) 1529 != &initarray_generic.cache); 1530 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1531 sizeof(struct arraycache_init)); 1532 /* 1533 * Do not assume that spinlocks can be initialized via memcpy: 1534 */ 1535 spin_lock_init(&ptr->lock); 1536 1537 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1538 ptr; 1539 } 1540 /* 5) Replace the bootstrap kmem_list3's */ 1541 { 1542 int nid; 1543 1544 for_each_online_node(nid) { 1545 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid); 1546 1547 init_list(malloc_sizes[INDEX_AC].cs_cachep, 1548 &initkmem_list3[SIZE_AC + nid], nid); 1549 1550 if (INDEX_AC != INDEX_L3) { 1551 init_list(malloc_sizes[INDEX_L3].cs_cachep, 1552 &initkmem_list3[SIZE_L3 + nid], nid); 1553 } 1554 } 1555 } 1556 1557 g_cpucache_up = EARLY; 1558} |
去掉了若干行代碼,別擔心,全是註釋。1387行,宏NUM_INIT_LISTS的值爲
#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
也就是3。執行3次循環,調用kmem_list3_init函數初始化全局變量initkmem_list3[]數組。該數組的定義也在同一個文件:
struct kmem_list3 {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
spinlock_t list_lock;
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
};
struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
而初始化它每個元素的函數也很簡單,位於同一個文件中:
static void kmem_list3_init(struct kmem_list3 *parent) { INIT_LIST_HEAD(&parent->slabs_full); INIT_LIST_HEAD(&parent->slabs_partial); INIT_LIST_HEAD(&parent->slabs_free); parent->shared = NULL; parent->alien = NULL; parent->colour_next = 0; spin_lock_init(&parent->list_lock); parent->free_objects = 0; parent->free_touched = 0; } |
隨後1392行,調用set_up_list3s函數爲全局變量cache_cache初始化它的lists字段。全局變量cache_cache太重要了,它是slab體系的核心數據結構,其定義如下:
static struct kmem_cache cache_cache = { .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, .buffer_size = sizeof(struct kmem_cache), .name = "kmem_cache", };
struct kmem_cache { /* 1) per-cpu data, touched during every alloc/free */ struct array_cache *array[NR_CPUS]; /* 2) Cache tunables. Protected by cache_chain_mutex */ unsigned int batchcount; unsigned int limit; unsigned int shared;
unsigned int buffer_size; u32 reciprocal_buffer_size; /* 3) touched by every alloc & free from the backend */
unsigned int flags; /* constant flags */ unsigned int num; /* # of objs per slab */
/* 4) cache_grow/shrink */ /* order of pgs per slab (2^n) */ unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */ gfp_t gfpflags;
size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ struct kmem_cache *slabp_cache; unsigned int slab_size; unsigned int dflags; /* dynamic flags */
/* constructor func */ void (*ctor)(void *obj);
/* 5) cache creation/removal */ const char *name; struct list_head next;
/* 6) statistics */ #ifdef CONFIG_DEBUG_SLAB ……slab調試相關數據結構,省略。 #endif /* CONFIG_DEBUG_SLAB */ ……一些註釋…… struct kmem_list3 *nodelists[MAX_NUMNODES]; /* * Do not add fields after nodelists[] */ }; |
kmem_cache數據結構的註釋寫得很詳細,大家可以去仔細看看。set_up_list3s函數在本文件中:
#define CACHE_CACHE 0 static void __init set_up_list3s(struct kmem_cache *cachep, int index) { int node;
for_each_online_node(node) { cachep->nodelists[node] = &initkmem_list3[index + node]; cachep->nodelists[node]->next_reap = jiffies + REAPTIMEOUT_LIST3 + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; } }
|
很簡單,就是把全局cache_cache變量的nodelists[0]設置成剛纔初始化好的initkmem_list3[0]的地址。如果是NUMA體系,則每個NODE有一個initkmem_list3數組,由cache_cache的nodelists[]數組的每個元素指向。
繼續走,1424行,初始化一個內核全局鏈表cache_chain,這個東西就是個很簡單的list_head結構,定義在同一個文件中:
static struct list_head cache_chain;
隨後調用list_add將它與cache_cache鏈接起來,接下來1426~1454行初始化這個cache_cache的其他字段
來到1457行,又一個重要的全局變量malloc_sizes。這個變量關係着通用slab分配器的初始化,有關專用/通用slab分配器的概念是Linux kernel內存管理的核心內容,對這個概念不熟悉的同學請重新學習一下Linux內核管理。來看這個變量的定義,在同一文件的570行:
570 struct cache_sizes malloc_sizes[] = { 571 #define CACHE(x) { .cs_size = (x) }, 572 #include <linux/kmalloc_sizes.h> 573 CACHE(ULONG_MAX) 574 #undef CACHE 575}; |
cache_sizes是個如下結構:
struct cache_sizes {
size_t cs_size;
struct kmem_cache *cs_cachep;
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *cs_dmacachep;
#endif
};
那麼malloc_sizes[]數組的全部元素來自linux/kmalloc_sizes.h文件,下面就來看看這個文件的全部內容:
#if (PAGE_SIZE == 4096) CACHE(32) #endif CACHE(64) #if L1_CACHE_BYTES < 64 CACHE(96) #endif CACHE(128) #if L1_CACHE_BYTES < 128 CACHE(192) #endif CACHE(256) CACHE(512) CACHE(1024) CACHE(2048) CACHE(4096) CACHE(8192) CACHE(16384) CACHE(32768) CACHE(65536) CACHE(131072) #if KMALLOC_MAX_SIZE >= 262144 CACHE(262144) #endif #if KMALLOC_MAX_SIZE >= 524288 CACHE(524288) #endif #if KMALLOC_MAX_SIZE >= 1048576 CACHE(1048576) #endif #if KMALLOC_MAX_SIZE >= 2097152 CACHE(2097152) #endif #if KMALLOC_MAX_SIZE >= 4194304 CACHE(4194304) #endif #if KMALLOC_MAX_SIZE >= 8388608 CACHE(8388608) #endif #if KMALLOC_MAX_SIZE >= 16777216 CACHE(16777216) #endif #if KMALLOC_MAX_SIZE >= 33554432 CACHE(33554432) #endif |
全局變量malloc_sizes數組的初始化就在編譯vmlinux的時候定義成上述形式,其首地址在函數中被賦給了內部變量sizes。1458行,cache_names是一個跟malloc_sizes差不多的全局變量數組:
static struct cache_names __initdata cache_names[] = {
#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
#include <linux/kmalloc_sizes.h>
{NULL,}
#undef CACHE
};
其首地址同樣被賦給了內部變量names。那麼,1466-1509行,調用kmem_cache_create函數爲每一個通用slab分配器初始化cache。。這個函數首先根據參數確定處理新高速緩存的最佳方法(例如,是在slab 的內部還是外部包含slab 描述符)。然後它從cache_cache普通高速緩存中爲新的高速緩存分配一個高速緩存描述符kmem_cache_t:
(kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
並把這個描述符插入到高速緩存描述符的cache_chain鏈表中(當獲得了用於保護鏈表避免被同時訪問的cache_chain_sem 信號量後,插入操作完成)。具體的細節我就不多說了,有興趣的同學可以參照博文“slab分配器”
http://blog.csdn.net/yunsongice/archive/2010/01/30/5272715.aspx
以及源代碼進行分析。
上述代碼執行完畢後,slab通用分配器kmalloc函數就可以使用了。所以我們看到1514行調用kmalloc分配了一個arraycache_init結構,隨後1516~1538行代碼初始化涉及每CPU的cache_cache.array,把以前初始化時期那些沒用的數據給覆蓋掉。
1541~1555調用init_list函數把cache_cache和malloc_sizes[INDEX_AC].cs_cachep的kmem_list3結構清空,因爲沒用了:
static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) { struct kmem_list3 *ptr;
ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); BUG_ON(!ptr);
memcpy(ptr, list, sizeof(struct kmem_list3)); /* * Do not assume that spinlocks can be initialized via memcpy: */ spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; } |
kmem_cache_init函數的最後一行,把全局變量g_cpucache_up設置成EARLY,slab分配器就初始化完了。