pg_data_t數據結構linux2.6.37

pg_data_t數據結構2.6.37

這個數據結構用於非一致內存訪問(NUMA)機器,表示比zone更高層次的內存區。

關於NUMA的文章

http://www.ibm.com/developerworks/cn/linux/l-numa/

http://blog.chinaunix.net/uid-7295895-id-3076420.html

NUMA有多個節點,而每個節點內,訪問內存的時間是相同的,不同的節點,訪問內存的時間可以不同。

在NUMA機器上,每個NUMA 節點(node)有一個pg_data_t來描述它的內存佈局。

對於UMA上,只有一個節點的情況,只是contig_page_data的靜態pg_data_t結構。

#define NODE_DATA(nid) (&contig_page_data)


每個節點(node),有可以分成區(zone),如ZONE_DMA、ZONE_NORMAL、ZONE_HIGHMEM

每個區有自己的特殊用途,如ZONE_DMA是低端的物理內存,如ISA設備需要用到它;

ZONE_NORMAL的地址可以直接映射到線性地址空間中。

不過對於ARM來說,ZONE_DMA和ZONE_NORMAL應該沒有啥區別。


/*

 * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM

 * (mostly NUMA machines?) to denote a higher-level memory zone than the

 * zone denotes.

 *

 * On NUMA machines, each NUMA node would have a pg_data_t to describe

 * it's memory layout.

 *

 * Memory statistics and page replacement data structures are maintained on a

 * per-zone basis.

 */

struct bootmem_data;

typedef struct pglist_data {

struct zone node_zones[MAX_NR_ZONES];

struct zonelist node_zonelists[MAX_ZONELISTS];

int nr_zones; /* 表示該節點的區數 */

#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */

struct page *node_mem_map;

#ifdef CONFIG_CGROUP_MEM_RES_CTLR

struct page_cgroup *node_page_cgroup;

#endif

#endif

#ifndef CONFIG_NO_BOOTMEM

struct bootmem_data *bdata;/* 指向內存引導程序 */

#endif

#ifdef CONFIG_MEMORY_HOTPLUG

/*

 * Must be held any time you expect node_start_pfn, node_present_pages

 * or node_spanned_pages stay constant.  Holding this will also

 * guarantee that any pfn_valid() stays that way.

 *

 * Nests above zone->lock and zone->size_seqlock.

 */

spinlock_t node_size_lock;

#endif

unsigned long node_start_pfn; /* 節點(node)的開始PFN */

unsigned long node_present_pages; /* total number of physical pages 

                                                此節點總的物理頁數

                                          */

unsigned long node_spanned_pages; /* total size of physical page

     range, including holes

                                              此節點物理頁數,包括洞

                                         */

int node_id;              /*  node id(NID) 節點的ID號 */

wait_queue_head_t kswapd_wait;/* kswpdN的等待隊列鏈表 */

struct task_struct *kswapd;   /* 該節點的kswapdN進程 */

int kswapd_max_order;         /* ...*/

} pg_data_t;


node_zones:

該節點的區。

zone_type 區類型

enum zone_type {

#ifdef CONFIG_ZONE_DMA

/*

* ZONE_DMA is used when there are devices that are not able

* to do DMA to all of addressable memory (ZONE_NORMAL). Then we

* carve out the portion of memory that is needed for these devices.

* The range is arch specific.這個範圍是體系結構決定的

*

* Some examples

*

* Architecture Limit

* ---------------------------

* parisc, ia64, sparc <4G

* s390 <2G

* arm Various

* alpha Unlimited or 0-16MB.

*

* i386, x86_64 and multiple other arches

* <16M.

*/

ZONE_DMA,

#endif

#ifdef CONFIG_ZONE_DMA32

/*

* x86_64 needs two ZONE_DMAs because it supports devices that are

* only able to do DMA to the lower 16M but also 32 bit devices that

* can only do DMA areas below 4G.

*/

ZONE_DMA32,

#endif

/*

* Normal addressable memory is in ZONE_NORMAL. DMA operations can be

* performed on pages in ZONE_NORMAL if the DMA devices support

* transfers to all addressable memory.

         * 如果DMA設備支持所有可尋址內存,則DMA操作能在ZONE_NORMAL的頁中執行

*/

ZONE_NORMAL,

#ifdef CONFIG_HIGHMEM

/*

* A memory area that is only addressable by the kernel through

* mapping portions into its own address space. This is for example

* used by i386 to allow the kernel to address the memory beyond

* 900MB. The kernel will set up special mappings (page

* table entries on i386) for each page that the kernel needs to

* access.

*/

ZONE_HIGHMEM,

#endif

ZONE_MOVABLE,

__MAX_NR_ZONES

};



node_zonelists:內存分配時,區的順序

start_kernel()==>build_all_zonelists(NULL);===》》build_zonelists


node_mem_map: 指向struct page數組的第一個頁面,即mem_map數組,指向該node所有的頁;

free_area_init_node=>alloc_node_mem_map

mem_map = NODE_DATA(0)->node_mem_map;



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章