Linux內存管理 (7)VMA操作【轉】 Linux內存管理專題

轉自:https://www.cnblogs.com/arnoldlu/p/8329279.html

專題:Linux內存管理專題

關鍵詞:VMA、vm_area_struct、查找/插入/合併VMA、紅黑樹。

 

用戶進程可以擁有3GB大小的空間,遠大於物理內存,那麼這些用戶進程的虛擬地址空間是如何管理的呢?

malloc()或mmap()操作都會要求在虛擬地址空間中分配內存塊,但這些內存在物理上往往都是離散的。

這些進程地址空間在內核中使用struct vm_area_struct數據結構來描述,簡稱VMA,也被稱爲進程地址空間或進程線性區。

1. 數據結構

struct vm_area_struct可以說是VMA的描述符,在創建之後會插入到mm->mm_rb紅黑樹和mm->mmap鏈表中。

複製代碼
/*
 * This struct defines a memory VMM memory area. There is one of these
 * per VM-area/task.  A VM area is any part of the process virtual memory
 * space that has a special rule for the page-fault handlers (ie a shared
 * library, the executable area etc).
 */
struct vm_area_struct {
    /* The first cache line has the info for VMA tree walking. */

    unsigned long vm_start;        /* Our start address within vm_mm. */--------VMA在進程地址空間的起始結束地址
    unsigned long vm_end;        /* The first byte after our end address
                       within vm_mm. */

    /* linked list of VM areas per task, sorted by address */
    struct vm_area_struct *vm_next, *vm_prev;----------------------------------VMA鏈表的前後成員

    struct rb_node vm_rb;------------------------------------------------------VMA作爲一個節點加入到紅黑樹中,每個進程的mm_struct中都有一個紅黑樹mm->mm_rb。

    /*
     * Largest free memory gap in bytes to the left of this VMA.
     * Either between this VMA and vma->vm_prev, or between one of the
     * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
     * get_unmapped_area find a free area of the right size.
     */
    unsigned long rb_subtree_gap;

    /* Second cache line starts here. */

    struct mm_struct *vm_mm;    /* The address space we belong to. */--------指向VMA所屬進程的struct mm_struct結構。
    pgprot_t vm_page_prot;        /* Access permissions of this VMA. */------VMA訪問權限
    unsigned long vm_flags;        /* Flags, see mm.h. */--------------------VMA標誌位

    /*
     * For areas with an address space and backing store,
     * linkage into the address_space->i_mmap interval tree.
     */
    struct {
        struct rb_node rb;
        unsigned long rb_subtree_last;
    } shared;

    /*
     * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
     * list, after a COW of one of the file pages.    A MAP_SHARED vma
     * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
     * or brk vma (with NULL file) can only be in an anon_vma list.
     */
    struct list_head anon_vma_chain; /* Serialized by mmap_sem &-----------用於管理RMAP反向映射。
                      * page_table_lock */
    struct anon_vma *anon_vma;    /* Serialized by page_table_lock */------用於管理RMAP反向映射。

    /* Function pointers to deal with this struct. */
    const struct vm_operations_struct *vm_ops;-----------------------------VMA操作函數合集,常用於文件映射。

    /* Information about our backing store: */
    unsigned long vm_pgoff;        /* Offset (within vm_file) in PAGE_SIZE-指定文件映射的偏移量,單位是頁面。
                       units, *not* PAGE_CACHE_SIZE */
    struct file * vm_file;        /* File we map to (can be NULL). */------描述一個被映射的文件。
    void * vm_private_data;        /* was vm_pte (shared mem) */

#ifndef CONFIG_MMU
    struct vm_region *vm_region;    /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
    struct mempolicy *vm_policy;    /* NUMA policy for the VMA */
#endif
}
複製代碼

struct  mm_struct是描述進程內存管理的核心數據結構,VMA屬於進程內存區域。在mm_struct中通過mmap鏈表和mm_rb對vm_area_struct進行管理。

struct mm_struct {
    struct vm_area_struct *mmap;        /* list of VMAs */-----單鏈表,按起始地址遞增的方式插入,所有的VMA都連接到此鏈表中。鏈表頭是mm_struct->mmap。
    struct rb_root mm_rb;--------------------------------------所有的VMA按照地址插入mm_struct->mm_rb紅黑樹中,mm_struct->mm_rb是根節點,每個進程都有一個紅黑樹。
...
}

 

2. 查找VMA

 

複製代碼
/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
    struct rb_node *rb_node;
    struct vm_area_struct *vma;

    /* Check the cache first. */
    vma = vmacache_find(mm, addr);
    if (likely(vma))
        return vma;

    rb_node = mm->mm_rb.rb_node;
    vma = NULL;

    while (rb_node) {
        struct vm_area_struct *tmp;

        tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);

        if (tmp->vm_end > addr) {
            vma = tmp;
            if (tmp->vm_start <= addr)
                break;
            rb_node = rb_node->rb_left;
        } else
            rb_node = rb_node->rb_right;
    }

    if (vma)
        vmacache_update(addr, vma);
    return vma;
}

struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
{
    int i;

    if (!vmacache_valid(mm))
        return NULL;

    count_vm_vmacache_event(VMACACHE_FIND_CALLS);

    for (i = 0; i < VMACACHE_SIZE; i++) {
        struct vm_area_struct *vma = current->vmacache[i];

        if (!vma)
            continue;
        if (WARN_ON_ONCE(vma->vm_mm != mm))
            break;
        if (vma->vm_start <= addr && vma->vm_end > addr) {
            count_vm_vmacache_event(VMACACHE_FIND_HITS);
            return vma;
        }
    }

    return NULL;
}

void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
{
    if (vmacache_valid_mm(newvma->vm_mm))
        current->vmacache[VMACACHE_HASH(addr)] = newvma;
}
複製代碼

 

 

 

 

 

 

3. 插入VMA

複製代碼
int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{
    struct vm_area_struct *prev;
    struct rb_node **rb_link, *rb_parent;

    /*
     * The vm_pgoff of a purely anonymous vma should be irrelevant
     * until its first write fault, when page's anon_vma and index
     * are set.  But now set the vm_pgoff it will almost certainly
     * end up with (unless mremap moves it elsewhere before that
     * first wfault), so /proc/pid/maps tells a consistent story.
     *
     * By setting it to reflect the virtual start address of the
     * vma, merges and splits can happen in a seamless way, just
     * using the existing file pgoff checks and manipulations.
     * Similarly in do_mmap_pgoff and in do_brk.
     */
    if (!vma->vm_file) {
        BUG_ON(vma->anon_vma);
        vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
    }
    if (find_vma_links(mm, vma->vm_start, vma->vm_end,
               &prev, &rb_link, &rb_parent))
        return -ENOMEM;
    if ((vma->vm_flags & VM_ACCOUNT) &&
         security_vm_enough_memory_mm(mm, vma_pages(vma)))
        return -ENOMEM;

    vma_link(mm, vma, prev, rb_link, rb_parent);
    return 0;
}
複製代碼

 

 

複製代碼
static int find_vma_links(struct mm_struct *mm, unsigned long addr,
        unsigned long end, struct vm_area_struct **pprev,
        struct rb_node ***rb_link, struct rb_node **rb_parent)
{
    struct rb_node **__rb_link, *__rb_parent, *rb_prev;

    __rb_link = &mm->mm_rb.rb_node;
    rb_prev = __rb_parent = NULL;

    while (*__rb_link) {
        struct vm_area_struct *vma_tmp;

        __rb_parent = *__rb_link;
        vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);

        if (vma_tmp->vm_end > addr) {
            /* Fail if an existing vma overlaps the area */
            if (vma_tmp->vm_start < end)
                return -ENOMEM;
            __rb_link = &__rb_parent->rb_left;
        } else {
            rb_prev = __rb_parent;
            __rb_link = &__rb_parent->rb_right;
        }
    }

    *pprev = NULL;
    if (rb_prev)
        *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
    *rb_link = __rb_link;
    *rb_parent = __rb_parent;
    return 0;
}
複製代碼

 

 

複製代碼
static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
            struct vm_area_struct *prev, struct rb_node **rb_link,
            struct rb_node *rb_parent)
{
    struct address_space *mapping = NULL;

    if (vma->vm_file) {
        mapping = vma->vm_file->f_mapping;
        i_mmap_lock_write(mapping);
    }

    __vma_link(mm, vma, prev, rb_link, rb_parent);
    __vma_link_file(vma);

    if (mapping)
        i_mmap_unlock_write(mapping);

    mm->map_count++;
    validate_mm(mm);
}
複製代碼

 

 

4. 合併VMA

複製代碼
/*
 * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
 * whether that can be merged with its predecessor or its successor.
 * Or both (it neatly fills a hole).
 *
 * In most cases - when called for mmap, brk or mremap - [addr,end) is
 * certain not to be mapped by the time vma_merge is called; but when
 * called for mprotect, it is certain to be already mapped (either at
 * an offset within prev, or at the start of next), and the flags of
 * this area are about to be changed to vm_flags - and the no-change
 * case has already been eliminated.
 *
 * The following mprotect cases have to be considered, where AAAA is
 * the area passed down from mprotect_fixup, never extending beyond one
 * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
 *
 *     AAAA             AAAA                AAAA          AAAA
 *    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPNNNNXXXX
 *    cannot merge    might become    might become    might become
 *                    PPNNNNNNNNNN    PPPPPPPPPPNN    PPPPPPPPPPPP 6 or
 *    mmap, brk or    case 4 below    case 5 below    PPPPPPPPXXXX 7 or
 *    mremap move:                                    PPPPNNNNNNNN 8
 *        AAAA
 *    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN
 *    might become    case 1 below    case 2 below    case 3 below
 *
 * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
 * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
 */
struct vm_area_struct *vma_merge(struct mm_struct *mm,
            struct vm_area_struct *prev, unsigned long addr,
            unsigned long end, unsigned long vm_flags,
            struct anon_vma *anon_vma, struct file *file,
            pgoff_t pgoff, struct mempolicy *policy)
{
    pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
    struct vm_area_struct *area, *next;
    int err;

    /*
     * We later require that vma->vm_flags == vm_flags,
     * so this tests vma->vm_flags & VM_SPECIAL, too.
     */
    if (vm_flags & VM_SPECIAL)
        return NULL;

    if (prev)
        next = prev->vm_next;
    else
        next = mm->mmap;
    area = next;
    if (next && next->vm_end == end)        /* cases 6, 7, 8 */
        next = next->vm_next;

    /*
     * Can it merge with the predecessor?
     */
    if (prev && prev->vm_end == addr &&
            mpol_equal(vma_policy(prev), policy) &&
            can_vma_merge_after(prev, vm_flags,
                        anon_vma, file, pgoff)) {
        /*
         * OK, it can.  Can we now merge in the successor as well?
         */
        if (next && end == next->vm_start &&
                mpol_equal(policy, vma_policy(next)) &&
                can_vma_merge_before(next, vm_flags,
                    anon_vma, file, pgoff+pglen) &&
                is_mergeable_anon_vma(prev->anon_vma,
                              next->anon_vma, NULL)) {
                            /* cases 1, 6 */
            err = vma_adjust(prev, prev->vm_start,
                next->vm_end, prev->vm_pgoff, NULL);
        } else                    /* cases 2, 5, 7 */
            err = vma_adjust(prev, prev->vm_start,
                end, prev->vm_pgoff, NULL);
        if (err)
            return NULL;
        khugepaged_enter_vma_merge(prev, vm_flags);
        return prev;
    }

    /*
     * Can this new request be merged in front of next?
     */
    if (next && end == next->vm_start &&
            mpol_equal(policy, vma_policy(next)) &&
            can_vma_merge_before(next, vm_flags,
                    anon_vma, file, pgoff+pglen)) {
        if (prev && addr < prev->vm_end)    /* case 4 */
            err = vma_adjust(prev, prev->vm_start,
                addr, prev->vm_pgoff, NULL);
        else                    /* cases 3, 8 */
            err = vma_adjust(area, addr, next->vm_end,
                next->vm_pgoff - pglen, NULL);
        if (err)
            return NULL;
        khugepaged_enter_vma_merge(area, vm_flags);
        return area;
    }

    return NULL;
}
複製代碼

 

 

 

 

 

 

5. 紅黑樹例子

 

聯繫方式:[email protected]
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章