//+++++++++++struct列表+++++++++++++++++++++++++++++++++++++++++++
struct mm_struct ;
struct vm_area_struct
/*這個參考《深入理解LINUX內存管理》學習筆記 */
typedef struct pglist_data;
struct zone;
struct page;
//++++++++++++以下是個struct的詳細解析++++++++++++++++++++++++++++++++++
//struct mm_struct 可以由
current->mm
或者
task = get_proc_task(file->f_path.dentry->d_inode); //get_pid_task(proc_pid(inode), PIDTYPE_PID);
mm = get_task_mm(task);
得到。
struct mm_struct{
struct vm_area_struct * mmap; /* 這個鏈表鏈接了屬於這個內存描述符的所有 vm_area_struct 結構體。*/
struct rb_root mm_rb; /* 由於屬於一個內存描述符的內存區域可能非常多,爲了加快內存區域的查找以及添加刪除等操作的速度,內核用 mm_rb 表示一棵鏈接了所有內存區域的紅黑樹。*/
/*mmap 和 mm_rb 是用兩種不同的數據結構表示同一批數據。*/
struct vm_area_struct * mmap_cache; ;/* 指向最後一個引用的線性區對象*/
#ifdef CONFIG_MMU
/* 在進程地址空間中搜索有效線性地址區間的方法 */
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
unsigned long task_size; /* size of task vm space */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd; /* 指向頁全局目錄 */
atomic_t mm_users;
atomic_t mm_count;
/*每一個進程如果擁有一個內存描述符,則會增
加 mm_users 的計數,所有 mm_users 的計數只相當於 mm_count 的一個計數。比如
n 個 Linux 線程共享同一個內存描述符,那麼對應的內存描述符的 mm_users 計數則爲
n,而 mm_count 則可能只是 1。如果有內核執行序列想要訪問一個內存描述符,則該
執行序列先增加 mm_count 的計數,使用結束後減少 mm_count 的計數。一但
mm_count 減爲 0,表示該內存描述符沒有任何引用,則它會被內核銷燬。*/
int map_count; /* number of VMAs */ /* 線性區的個數 */
spinlock_t page_table_lock; /* Protects page tables and some counters */
struct rw_semaphore mmap_sem;
/*mmap_sem 是一個讀寫鎖,凡是需要操作內存描述符中的內存區域時,則需要先得到
相應的讀鎖或者寫鎖,使用結束後釋放該鎖*/
struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
* together off init_mm.mmlist, and are protected
* by mmlist_lock
*/
/*mm_list 字段是一個循環雙鏈表。它鏈接
了系統中所有的內存描述符。*/
unsigned long hiwater_rss; /* High-watermark of RSS usage */ /* 進程所擁有的最大頁框數 */
unsigned long hiwater_vm; /* 進程線性區中的最大頁數 */
unsigned long total_vm; /* Total pages mapped */
unsigned long locked_vm; /* Pages that have PG_mlocked set */ /*"鎖住"而不能換出的頁的個數*/
unsigned long pinned_vm; /* Refcount permanently increased */
unsigned long shared_vm; /* Shared pages (files) */ /*共享文件內存映射中的頁數*/
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ /*可執行內存映射中的頁數*/
unsigned long stack_vm; /* VM_GROWSUP/DOWN */ /*用戶態堆棧中的頁數*/
unsigned long def_flags;
unsigned long nr_ptes; /* Page table pages */
unsigned long start_code, end_code, start_data, end_data; /*代碼段的起始地址,代碼段的最後地址,數據段的起始地址和數據段的最後的地址*/
unsigned long start_brk, brk, start_stack;/*堆的起始地址,堆的當前最後地址,用戶態堆棧的起始地址*/
unsigned long arg_start, arg_end, env_start, env_end;/*命令行參數的起始地址,命令行參數的最後地址,環境變量的起始地址,環境變量的最後地址*/
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ /* 開始執行ELF程序時會使用到saved_auxv參數 */
/*
* Special counters, in some configurations protected by the
* page_table_lock, in other configurations by being atomic.
*/
struct mm_rss_stat rss_stat;
struct linux_binfmt *binfmt;
cpumask_var_t cpu_vm_mask_var;
/* Architecture-specific MM context */
mm_context_t context;
unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_AIO
spinlock_t ioctx_lock;
struct hlist_head ioctx_list;
#endif
#ifdef CONFIG_MM_OWNER
/*
* "owner" points to a task that is regarded as the canonical
* user/owner of this mm. All of the following must be true in
* order for it to be changed:
*
* current == mm->owner
* current->mm != mm
* new_owner->mm == mm
* new_owner->alloc_lock is held
*/
struct task_struct __rcu *owner;
#endif
/* store ref to file /proc/<pid>/exe symlink points to */
struct file *exe_file;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
struct cpumask cpumask_allocation;
#endif
#ifdef CONFIG_NUMA_BALANCING
/*
* numa_next_scan is the next time that the PTEs will be marked
* pte_numa. NUMA hinting faults will gather statistics and migrate
* pages to new nodes if necessary.
*/
unsigned long numa_next_scan;
/* numa_next_reset is when the PTE scanner period will be reset */
unsigned long numa_next_reset;
/* Restart point for scanning and setting pte_numa */
unsigned long numa_scan_offset;
/* numa_scan_seq prevents two threads setting pte_numa */
int numa_scan_seq;
/*
* The first node a task was scheduled on. If a task runs on
* a different node than Make PTE Scan Go Now.
*/
int first_nid;
#endif
#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
/*
* An operation with batched TLB flushing is going on. Anything that
* can move process memory needs to flush the TLB when moving a
* PROT_NONE or PROT_NUMA mapped page.
*/
bool tlb_flush_pending;
#endif
struct uprobes_state uprobes_state;
};
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
struct vm_area_struct
{
struct mm_struct * vm_mm; /*指向該 VMA 屬於的內存描述符。*/
unsigned long vm_start; /*虛擬區開始的地址*/
unsigned long vm_end; /*虛擬區結束的地址,但不包括 vm_end 指向的地址,即vm_end 是虛擬內存區域的最後一個有效字節的後一個字節。*/
struct vm_area_struct *vm_next;/*鏈接虛存區*/
pgprot_t vm_page_prot; /*虛存區的保護權限*/
unsigned long vm_flags; /*虛存區的標誌*/
short vm_avl_height;/*AVL的高度*/
struct vm_area_struct * vm_avl_left; /*左虛存區節點*/
struct vm_area_struct * vm_avl_right;/*右虛存區節點*/
struct vm_area_struct *vm_next_share;
struct vm_area_struct **vm_pprev_share;
struct vm_operations_struct * vm_ops;/*對虛存區操作的函數*/
unsigned long vm_pgoff; /* 映射文件中的偏移量*/
struct file * vm_file;/*vm_file 是該內存區域對應的文件,如果內存區域是匿名的,則該字段被置爲 NULL。*/
unsigned long vm_raend;/
voidvoid * vm_private_data; /*指向內存區的私有數據*/
};
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES]; /*節點中的管理區 分別爲ZONE_DMA,ZONE_NORMAL,ZONE_HIGHMEM*/
/*list中zone的順序代表了分配內存的順序,前者分配內存失敗將會到後者的區域中分配內存;
當調用free_area_init_core()時,由mm/page_alloc.c文件中的build_zonelists()函數設置*/
struct zonelist node_zonelists[MAX_ZONELISTS];
int nr_zones; /*節點中管理區的數目,不一定爲3個,有的節點中可能不存在ZONE_DMA*/
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map; /*node中的第一個page,它可以指向mem_map中的任何一個page*/
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
struct page_cgroup *node_page_cgroup;
#endif
#endif
struct bootmem_data *bdata;/*這個僅用於boot 的內存分配*/
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Must be held any time you expect node_start_pfn, node_present_pages
* or node_spanned_pages stay constant. Holding this will also
* guarantee that any pfn_valid() stays that way.
*
* Nests above zone->lock and zone->size_seqlock.
*/
spinlock_t node_size_lock;
#endif
unsigned long node_start_pfn; /*pfn是page frame number的縮寫。這個成員是用於表示node中的開始那個page在物理內存中的位置的; 該節點的起始頁框編號*/
unsigned long node_present_pages; /* total number of physical pages ;node中的真正可以使用的page數量*/
unsigned long node_spanned_pages; /* total size of physical page range, including holes ; */
int node_id; /*節點標識符,代表當前節點是系統中的第幾個節點*/
wait_queue_head_t kswapd_wait; /*頁換出進程使用的等待隊列*/
struct task_struct *kswapd; /*指向頁換出進程的進程描述符*/
int kswapd_max_order; /*kswapd將要創建的空閒塊的大小取對數的值*/
}
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
struct zone {
/* Fields commonly accessed by the page allocator */
/* zone watermarks, access with *_wmark_pages(zone) macros */
unsigned long watermark[NR_WMARK];/*該管理區的三個水平線值,min,low,high*/
/*
* When free pages are below this point, additional steps are taken
* when reading the number of free pages to avoid per-cpu counter
* drift allowing watermarks to be breached
*/
unsigned long percpu_drift_mark;
/*
* We don't know if the memory that we're going to allocate will be freeable
* or/and it will be released eventually, so to avoid totally wasting several
* GB of ram we must reserve some of the lower zone memory (otherwise we risk
* to run OOM on the lower zones despite there's tons of freeable ram
* on the higher zones). This array is recalculated at runtime if the
* sysctl_lowmem_reserve_ratio sysctl changes.
*/
/*每個管理區必須保留的頁框數*/ /*爲了防止一些代碼必須運行在低地址區域,所以事先保留一些低地址區域的內存*/
unsigned long lowmem_reserve[MAX_NR_ZONES];
#ifdef CONFIG_NUMA /*如果定義了NUMA*/
int node; /*該管理區所屬節點的節點號*/
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
unsigned long min_unmapped_pages; /*當可回收的頁面數大於該變量時,管理區將回收頁面*/
unsigned long min_slab_pages; /*同上,只不過該標準用於slab回收頁面中*/
struct per_cpu_pageset *pageset[NR_CPUS]; /*每個CPU使用的頁面緩存*/
#else
struct per_cpu_pageset pageset[NR_CPUS];
#endif
/*
* free areas of different sizes
*/
spinlock_t lock; /*保護該管理區的自旋鎖*/
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
#endif
struct free_area free_area[MAX_ORDER];/*標識出管理區中的空閒頁框塊; 頁面使用狀態的信息,以每個bit標識對應的page是否可以分配*/
#ifndef CONFIG_SPARSEMEM
/*
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
ZONE_PADDING(_pad1_)
/* Fields commonly accessed by the page reclaim scanner */
spinlock_t lru_lock; /*(最近最少使用算法)的自旋鎖*/
struct zone_lru {
struct list_head list;
} lru[NR_LRU_LISTS];
struct zone_reclaim_stat reclaim_stat; /*頁面回收的狀態*/
/*管理區回收頁框時使用的計數器,記錄到上一次回收,一共掃過的頁框數*/
unsigned long pages_scanned; /* since last reclaim */
unsigned long flags; /* zone flags, see below */
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
/*
* prev_priority holds the scanning priority for this zone. It is
* defined as the scanning priority at which we achieved our reclaim
* target at the previous try_to_free_pages() or balance_pgdat()
* invokation.
*
* We use prev_priority as a measure of how much stress page reclaim is
* under - it drives the swappiness decision: whether to unmap mapped
* pages.
*
* Access to both this field is quite racy even on uniprocessor. But
* it is expected to average out OK.
*/
int prev_priority;
/*
* The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
* this zone's LRU. Maintained by the pageout code.
*/
unsigned int inactive_ratio;
ZONE_PADDING(_pad2_)
/* Rarely used or read-mostly fields */
/*
* wait_table -- the array holding the hash table
* wait_table_hash_nr_entries -- the size of the hash table array
* wait_table_bits -- wait_table_size == (1 << wait_table_bits)
*
* The purpose of all these is to keep track of the people
* waiting for a page to become available and make them
* runnable again when possible. The trouble is that this
* consumes a lot of space, especially when so few things
* wait on pages at a given time. So instead of using
* per-page waitqueues, we use a waitqueue hash table.
*
* The bucket discipline is to sleep on the same queue when
* colliding and wake all in that wait queue when removing.
* When something wakes, it must check to be sure its page is
* truly available, a la thundering herd. The cost of a
* collision is great, but given the expected load of the
* table, they should be so rare as to be outweighed by the
* benefits from the saved space.
*
* __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
* primary users of these fields, and in mm/page_alloc.c
* free_area_init_core() performs the initialization of them.
*/
wait_queue_head_t * wait_table; /*等待一個page釋放的等待隊列哈希表。它會被wait_on_page(),unlock_page()函數使用. 用哈希表,而不用一個等待隊列的原因,防止進程長期等待資源。*/
unsigned long wait_table_hash_nr_entries; /*散列表數組的大小*/
unsigned long wait_table_bits; /*散列表數組的大小對2取log的結果*/
/*
* Discontig memory support fields.
*/
struct pglist_data *zone_pgdat; /*管理區所屬節點 ;指向這個zone所在的pglist_data對象*/
unsigned long zone_start_pfn; /*管理區的起始頁框號 zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
/*
* spanned_pages is the total pages spanned by the zone, including
* holes, which is calculated as:
* spanned_pages = zone_end_pfn - zone_start_pfn;
*
* present_pages is physical pages existing within the zone, which
* is calculated as:
* present_pages = spanned_pages - absent_pages(pages in holes);
*
* managed_pages is present pages managed by the buddy system, which
* is calculated as (reserved_pages includes pages allocated by the
* bootmem allocator):
* managed_pages = present_pages - reserved_pages;
*
* So present_pages may be used by memory hotplug or memory power
* management logic to figure out unmanaged pages by checking
* (present_pages - managed_pages). And managed_pages should be used
* by page allocator and vm scanner to calculate all kinds of watermarks
* and thresholds.
*
* Locking rules:
*
* zone_start_pfn and spanned_pages are protected by span_seqlock.
* It is a seqlock because it has to be read outside of zone->lock,
* and it is done in the main allocator path. But, it is written
* quite infrequently.
*
* The span_seq lock is declared along with zone->lock because it is
* frequently read in proximity to zone->lock. It's good to
* give them a chance of being in the same cacheline.
*
* Write access to present_pages at runtime should be protected by
* lock_memory_hotplug()/unlock_memory_hotplug(). Any reader who can't
* tolerant drift of present_pages should hold memory hotplug lock to
* get a stable value.
*
* Read access to managed_pages should be safe because it's unsigned
* long. Write access to zone->managed_pages and totalram_pages are
* protected by managed_page_count_lock at runtime. Idealy only
* adjust_managed_page_count() should be used instead of directly
* touching zone->managed_pages and totalram_pages.
*/
/*這個地方參考free_area_init_core()*/
unsigned long spanned_pages; /*管理區的大小,包括洞*/
unsigned long present_pages; /*管理區的大小,不包括洞, 可能包含dma_reserve,以及mem_map結構提所佔用的;*/
unsigned long managed_pages;
/*
* rarely used fields:
*/
const char *name; /*指向管理區的名稱,爲"DMA","NORMAL"或"HighMem"*/
}
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
struct page {
/* First double word block */
unsigned long flags; /*flags 字段存儲了頁面的狀態信息,例如:表示頁面剛被寫了數據的髒位;
該頁面是否被鎖定在內存中不充許置換到交換分區的標誌。*/
/*
爲空表示該頁屬於交換高速緩存;
mapping字段非空,且最低位是1,mapping字段中存放的是指向anon_vma描述符的指針,表示該頁爲匿名頁;
mapping字段非空,且最低位是0,mapping字段指向對應文件的address_space對象,表示該頁爲映射頁;
*/
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
* memory, low bit is set, and
* it points to anon_vma object:
* see PAGE_MAPPING_ANON below.
*/
/* Second double word */
struct {
union {
/*這個成員根據page的使用的目的有2種可能的含義。
第一種情況:如果page是file mapping的一部分,它指明在文件中的偏移。如果page是交換緩存,則它指明在address_space所聲明的對象:swapper_space(交換地址空間)中的偏移。
第二種情況:如果這個page是一個特殊的進程將要釋放的一個page塊,則這是一個將要釋放的page塊的序列值,這個值在__free_page_ok()函數中設置。
*/
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* slub/slob first free object */
bool pfmemalloc; /* If set by the page allocator,
* ALLOC_NO_WATERMARKS was set
* and the low watermark was not
* met implying that the system
* is under some pressure. The
* caller should try ensure
* this page is only used to
* free other pages.
*/
};
union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
/* Used for cmpxchg_double in slub */
unsigned long counters;
#else
/*
* Keep _count separate from slub cmpxchg_double data.
* As the rest of the double word is protected by
* slab_lock but _count is not.
*/
unsigned counters;
#endif
struct {
union {
/*
* Count of ptes mapped in
* mms, to show when page is
* mapped & limit reverse map
* searches.
*
* Used also for tail pages
* refcounting instead of
* _count. Tail pages cannot
* be mapped and keeping the
* tail page _count zero at
* all times guarantees
* get_page_unless_zero() will
* never succeed on tail
* pages.
*/
atomic_t _mapcount; //_mapcount字段存放引用頁框的頁表項數目,確定其是否共享;
struct { /* SLUB */
unsigned inuse:16;
unsigned objects:15;
unsigned frozen:1;
};
int units; /* SLOB */
};
atomic_t _count; /* page的訪問計數,當爲0是,說明page是空閒的,當大於0的時候,說明page被一個或多個進程真正使用或者kernel用於在等待I/O*/
/*_count 字段和_mapcount
字段都是引用計數,它們用來共同維護 page 頁面的生命期。_mapcount 表示一個頁面
擁有多少頁表項指向它,_count 被稱爲 page 的使用計數,所有的_mapcount 計數只
相當於_count 計數中的一次計數。如果內核代碼中某執行序列在訪問某個頁面時需要
確保該頁面存在,則在訪問前給_count 計數加一,訪問結束後_count 計數減一。當
_count 計數減到負數時表示沒有任何內核需要使用該頁面,則表示該頁面沒被使用。
內核代碼不應該直接訪問_count 計數,而應該使用 page_count 函數。該函數用一個
struct page 的指針做爲參數,當該頁空閒時函數返回 0,否則返回一個正數表示參數
指向的頁面正被使用。*/
};
};
};
/* Third double word block */
union {
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
struct { /* slub per cpu partial pages */
struct page *next; /* Next partial slab */
#ifdef CONFIG_64BIT
int pages; /* Nr of partial slabs left */
int pobjects; /* Approximate # of objects */
#else
short int pages;
short int pobjects;
#endif
};
struct list_head list; /* slobs list of pages */
struct slab *slab_page; /* slab fields */
};
/* Remainder is not double word aligned */
union {
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache;
* indicates order in the buddy
* system if PG_buddy is set.
*/
#if USE_SPLIT_PTLOCKS
spinlock_t ptl;
#endif
struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
struct page *first_page; /* Compound tail pages */
};
/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
* highmem some memory is mapped into kernel virtual memory
* dynamically, so we need a place to store that address.
* Note that this field could be 16 bits on x86 ... ;)
*
* Architectures with slow multiplication can define
* WANT_PAGE_VIRTUAL in asm/page.h
*/
#if defined(WANT_PAGE_VIRTUAL)
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
unsigned long debug_flags; /* Use atomic bitops on this */
#endif
struct task_struct *tsk_dirty; /* task that sets this page dirty */
#ifdef CONFIG_KMEMCHECK
/*
* kmemcheck wants to track the status of each byte in a page; this
* is a pointer to such a status block. NULL if not tracked.
*/
void *shadow;
#endif
#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
int _last_nid;
#endif
#ifdef CONFIG_PAGE_OWNER
int order;
gfp_t gfp_mask;
struct stack_trace trace;
unsigned long trace_entries[8];
#endif
}
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
<span style="font-size:18px;"><span style="font-size:12px;"></span></span><pre name="code" class="objc">mapping字段