初始化slab分配器

5.8.2 初始化slab分配器

回到mm_init()函數,繼續走,下一個函數kmem_cache_init(),也是重點函數,用於初始化內核slab分配體系。這個函數來自文件mm/slab.c

 

1375void __init kmem_cache_init(void)

1376{

1377        size_t left_over;

1378        struct cache_sizes *sizes;

1379        struct cache_names *names;

1380        int i;

1381        int order;

1382        int node;

1383

1384        if (num_possible_nodes() == 1)

1385                use_alien_caches = 0;

1386

1387        for (i = 0; i < NUM_INIT_LISTS; i++) {

1388                kmem_list3_init(&initkmem_list3[i]);

1389                if (i < MAX_NUMNODES)

1390                        cache_cache.nodelists[i] = NULL;

1391        }

1392        set_up_list3s(&cache_cache, CACHE_CACHE);

1393

……

1398        if (totalram_pages > (32 << 20) >> PAGE_SHIFT)

1399                slab_break_gfp_order = BREAK_GFP_ORDER_HI;

1400

……

1420

1421        node = numa_node_id();

1422

1423        /* 1) create the cache_cache */

1424        INIT_LIST_HEAD(&cache_chain);

1425        list_add(&cache_cache.next, &cache_chain);

1426        cache_cache.colour_off = cache_line_size();

1427        cache_cache.array[smp_processor_id()] = &initarray_cache.cache;

1428        cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

1429

……

1434        cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +

1435                                 nr_node_ids * sizeof(struct kmem_list3 *);

1436#if DEBUG

1437        cache_cache.obj_size = cache_cache.buffer_size;

1438#endif

1439        cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,

1440                                        cache_line_size());

1441        cache_cache.reciprocal_buffer_size =

1442                reciprocal_value(cache_cache.buffer_size);

1443

1444        for (order = 0; order < MAX_ORDER; order++) {

1445                cache_estimate(order, cache_cache.buffer_size,

1446                        cache_line_size(), 0, &left_over, &cache_cache.num);

1447                if (cache_cache.num)

1448                        break;

1449        }

1450        BUG_ON(!cache_cache.num);

1451        cache_cache.gfporder = order;

1452        cache_cache.colour = left_over / cache_cache.colour_off;

1453        cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +

1454                                      sizeof(struct slab), cache_line_size());

1455

1456        /* 2+3) create the kmalloc caches */

1457        sizes = malloc_sizes;

1458        names = cache_names;

1459

……

1466        sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,

1467                                        sizes[INDEX_AC].cs_size,

1468                                        ARCH_KMALLOC_MINALIGN,

1469                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1470                                        NULL);

1471

1472        if (INDEX_AC != INDEX_L3) {

1473                sizes[INDEX_L3].cs_cachep =

1474                        kmem_cache_create(names[INDEX_L3].name,

1475                                sizes[INDEX_L3].cs_size,

1476                                ARCH_KMALLOC_MINALIGN,

1477                                ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1478                                NULL);

1479        }

1480

1481        slab_early_init = 0;

1482

1483        while (sizes->cs_size != ULONG_MAX) {

……

1491                if (!sizes->cs_cachep) {

1492                        sizes->cs_cachep = kmem_cache_create(names->name,

1493                                        sizes->cs_size,

1494                                        ARCH_KMALLOC_MINALIGN,

1495                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,

1496                                        NULL);

1497                }

1498#ifdef CONFIG_ZONE_DMA

1499                sizes->cs_dmacachep = kmem_cache_create(

1500                                        names->name_dma,

1501                                        sizes->cs_size,

1502                                        ARCH_KMALLOC_MINALIGN,

1503                                        ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|

1504                                                SLAB_PANIC,

1505                                        NULL);

1506#endif

1507                sizes++;

1508                names++;

1509        }

1510        /* 4) Replace the bootstrap head arrays */

1511        {

1512                struct array_cache *ptr;

1513

1514                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

1515

1516                BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);

1517                memcpy(ptr, cpu_cache_get(&cache_cache),

1518                       sizeof(struct arraycache_init));

1519                /*

1520                 * Do not assume that spinlocks can be initialized via memcpy:

1521                 */

1522                spin_lock_init(&ptr->lock);

1523

1524                cache_cache.array[smp_processor_id()] = ptr;

1525

1526                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

1527

1528                BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)

1529                       != &initarray_generic.cache);

1530                memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),

1531                       sizeof(struct arraycache_init));

1532                /*

1533                 * Do not assume that spinlocks can be initialized via memcpy:

1534                 */

1535                spin_lock_init(&ptr->lock);

1536

1537                malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =

1538                    ptr;

1539        }

1540        /* 5) Replace the bootstrap kmem_list3's */

1541        {

1542                int nid;

1543

1544                for_each_online_node(nid) {

1545                        init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

1546

1547                        init_list(malloc_sizes[INDEX_AC].cs_cachep,

1548                                  &initkmem_list3[SIZE_AC + nid], nid);

1549

1550                        if (INDEX_AC != INDEX_L3) {

1551                                init_list(malloc_sizes[INDEX_L3].cs_cachep,

1552                                          &initkmem_list3[SIZE_L3 + nid], nid);

1553                        }

1554                }

1555        }

1556

1557        g_cpucache_up = EARLY;

1558}

 

去掉了若干行代碼,別擔心,全是註釋。1387行,宏NUM_INIT_LISTS的值爲

#define NUM_INIT_LISTS (3 * MAX_NUMNODES)

也就是3。執行3次循環,調用kmem_list3_init函數初始化全局變量initkmem_list3[]數組。該數組的定義也在同一個文件:

struct kmem_list3 {

       struct list_head slabs_partial; /* partial list first, better asm code */

       struct list_head slabs_full;

       struct list_head slabs_free;

       unsigned long free_objects;

       unsigned int free_limit;

       unsigned int colour_next;      /* Per-node cache coloring */

       spinlock_t list_lock;

       struct array_cache *shared;    /* shared per node */

       struct array_cache **alien;    /* on other nodes */

       unsigned long next_reap;       /* updated without locking */

       int free_touched;           /* updated without locking */

};

struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];

 

而初始化它每個元素的函數也很簡單,位於同一個文件中:

 

static void kmem_list3_init(struct kmem_list3 *parent)

{

       INIT_LIST_HEAD(&parent->slabs_full);

       INIT_LIST_HEAD(&parent->slabs_partial);

       INIT_LIST_HEAD(&parent->slabs_free);

       parent->shared = NULL;

       parent->alien = NULL;

       parent->colour_next = 0;

       spin_lock_init(&parent->list_lock);

       parent->free_objects = 0;

       parent->free_touched = 0;

}

 

隨後1392行,調用set_up_list3s函數爲全局變量cache_cache初始化它的lists字段。全局變量cache_cache太重要了,它是slab體系的核心數據結構,其定義如下:

 

static struct kmem_cache cache_cache = {

       .batchcount = 1,

       .limit = BOOT_CPUCACHE_ENTRIES,

       .shared = 1,

       .buffer_size = sizeof(struct kmem_cache),

       .name = "kmem_cache",

};

 

struct kmem_cache {

/* 1) per-cpu data, touched during every alloc/free */

       struct array_cache *array[NR_CPUS];

/* 2) Cache tunables. Protected by cache_chain_mutex */

       unsigned int batchcount;

       unsigned int limit;

       unsigned int shared;

 

       unsigned int buffer_size;

       u32 reciprocal_buffer_size;

/* 3) touched by every alloc & free from the backend */

 

       unsigned int flags;         /* constant flags */

       unsigned int num;          /* # of objs per slab */

 

/* 4) cache_grow/shrink */

       /* order of pgs per slab (2^n) */

       unsigned int gfporder;

 

       /* force GFP flags, e.g. GFP_DMA */

       gfp_t gfpflags;

 

       size_t colour;                /* cache colouring range */

       unsigned int colour_off; /* colour offset */

       struct kmem_cache *slabp_cache;

       unsigned int slab_size;

       unsigned int dflags;              /* dynamic flags */

 

       /* constructor func */

       void (*ctor)(void *obj);

 

/* 5) cache creation/removal */

       const char *name;

       struct list_head next;

 

/* 6) statistics */

#ifdef CONFIG_DEBUG_SLAB

……slab調試相關數據結構,省略。

#endif /* CONFIG_DEBUG_SLAB */

……一些註釋……

       struct kmem_list3 *nodelists[MAX_NUMNODES];

       /*

        * Do not add fields after nodelists[]

        */

};

 

kmem_cache數據結構的註釋寫得很詳細,大家可以去仔細看看。set_up_list3s函數在本文件中:

 

#define    CACHE_CACHE 0

static void __init set_up_list3s(struct kmem_cache *cachep, int index)

{

       int node;

 

       for_each_online_node(node) {

              cachep->nodelists[node] = &initkmem_list3[index + node];

              cachep->nodelists[node]->next_reap = jiffies +

                  REAPTIMEOUT_LIST3 +

                  ((unsigned long)cachep) % REAPTIMEOUT_LIST3;

       }

}

 

 

很簡單,就是把全局cache_cache變量的nodelists[0]設置成剛纔初始化好的initkmem_list3[0]的地址。如果是NUMA體系,則每個NODE有一個initkmem_list3數組,由cache_cachenodelists[]數組的每個元素指向。

 

繼續走,1424行,初始化一個內核全局鏈表cache_chain,這個東西就是個很簡單的list_head結構,定義在同一個文件中:

static struct list_head cache_chain;

隨後調用list_add將它與cache_cache鏈接起來,接下來1426~1454行初始化這個cache_cache的其他字段

 

來到1457行,又一個重要的全局變量malloc_sizes。這個變量關係着通用slab分配器的初始化,有關專用/通用slab分配器的概念是Linux kernel內存管理的核心內容,對這個概念不熟悉的同學請重新學習一下Linux內核管理。來看這個變量的定義,在同一文件的570行:

 

570 struct cache_sizes malloc_sizes[] = {

571 #define CACHE(x) { .cs_size = (x) },

572 #include <linux/kmalloc_sizes.h>

573     CACHE(ULONG_MAX)

574 #undef CACHE

575};

 

cache_sizes是個如下結構:

struct cache_sizes {

       size_t                  cs_size;

       struct kmem_cache *cs_cachep;

#ifdef CONFIG_ZONE_DMA

       struct kmem_cache *cs_dmacachep;

#endif

};

 

那麼malloc_sizes[]數組的全部元素來自linux/kmalloc_sizes.h文件,下面就來看看這個文件的全部內容:

#if (PAGE_SIZE == 4096)

       CACHE(32)

#endif

       CACHE(64)

#if L1_CACHE_BYTES < 64

       CACHE(96)

#endif

       CACHE(128)

#if L1_CACHE_BYTES < 128

       CACHE(192)

#endif

       CACHE(256)

       CACHE(512)

       CACHE(1024)

       CACHE(2048)

       CACHE(4096)

       CACHE(8192)

       CACHE(16384)

       CACHE(32768)

       CACHE(65536)

       CACHE(131072)

#if KMALLOC_MAX_SIZE >= 262144

       CACHE(262144)

#endif

#if KMALLOC_MAX_SIZE >= 524288

       CACHE(524288)

#endif

#if KMALLOC_MAX_SIZE >= 1048576

       CACHE(1048576)

#endif

#if KMALLOC_MAX_SIZE >= 2097152

       CACHE(2097152)

#endif

#if KMALLOC_MAX_SIZE >= 4194304

       CACHE(4194304)

#endif

#if KMALLOC_MAX_SIZE >= 8388608

       CACHE(8388608)

#endif

#if KMALLOC_MAX_SIZE >= 16777216

       CACHE(16777216)

#endif

#if KMALLOC_MAX_SIZE >= 33554432

       CACHE(33554432)

#endif

 

全局變量malloc_sizes數組的初始化就在編譯vmlinux的時候定義成上述形式,其首地址在函數中被賦給了內部變量sizes1458行,cache_names是一個跟malloc_sizes差不多的全局變量數組:

static struct cache_names __initdata cache_names[] = {

#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },

#include <linux/kmalloc_sizes.h>

       {NULL,}

#undef CACHE

};

 

其首地址同樣被賦給了內部變量names。那麼,1466-1509行,調用kmem_cache_create函數爲每一個通用slab分配器初始化cache。。這個函數首先根據參數確定處理新高速緩存的最佳方法(例如,是在slab 的內部還是外部包含slab 描述符)。然後它從cache_cache普通高速緩存中爲新的高速緩存分配一個高速緩存描述符kmem_cache_t

(kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);

並把這個描述符插入到高速緩存描述符的cache_chain鏈表中(當獲得了用於保護鏈表避免被同時訪問的cache_chain_sem 信號量後,插入操作完成)。具體的細節我就不多說了,有興趣的同學可以參照博文“slab分配器”

http://blog.csdn.net/yunsongice/archive/2010/01/30/5272715.aspx

以及源代碼進行分析。

 

上述代碼執行完畢後,slab通用分配器kmalloc函數就可以使用了。所以我們看到1514行調用kmalloc分配了一個arraycache_init結構,隨後1516~1538行代碼初始化涉及每CPUcache_cache.array,把以前初始化時期那些沒用的數據給覆蓋掉。

 

1541~1555調用init_list函數把cache_cachemalloc_sizes[INDEX_AC].cs_cachepkmem_list3結構清空,因爲沒用了:

 

static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,

                     int nodeid)

{

       struct kmem_list3 *ptr;

 

       ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);

       BUG_ON(!ptr);

 

       memcpy(ptr, list, sizeof(struct kmem_list3));

       /*

        * Do not assume that spinlocks can be initialized via memcpy:

        */

       spin_lock_init(&ptr->list_lock);

 

       MAKE_ALL_LISTS(cachep, ptr, nodeid);

       cachep->nodelists[nodeid] = ptr;

}

 

kmem_cache_init函數的最後一行,把全局變量g_cpucache_up設置成EARLYslab分配器就初始化完了。

 

發佈了283 篇原創文章 · 獲贊 24 · 訪問量 104萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章