Linux內核頁表初始化

Linux在內核啓動過程中start_kernel->setup_arch會調用如下兩個函數對頁表進行初始化和建立。

static inline void prepare_page_table(void)
{
    unsigned long addr;

    /*
     * Clear out all the mappings below the kernel image.
     */
    //初始化0~MODULES_VADDR(0xBF000000)地址空間的頁表,0xBF000000~0xC0000000的16M空間用來存放kernel module library
    //關於PGDIR_SIZE=2M和pmd_clear下面單獨敘述
    for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
        pmd_clear(pmd_off_k(addr));

#ifdef CONFIG_XIP_KERNEL
    //XIP內核(即kernel image存放在Nor flash等可偏上執行的存儲體中)存放在kernel module area,不能影響kernel image所在的地址空間的頁表,etext爲kernel代碼段結束地址,加一頁,應該是爲kernel數據段保留足夠的空間
    /* The XIP kernel is mapped in the module area -- skip over it */
    addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
#endif
    for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)//否則kernel module在使用時才動態加載,現在需要初始化該區域的頁表
        pmd_clear(pmd_off_k(addr));

    /*
     * Clear out all the kernel space mappings, except for the first
     * memory bank, up to the end of the vmalloc region.
     */
    //由於第一個Bank存儲着kernel image,這段空間的頁表在head.S已經建立完成,以後無論什麼動作都不能影響這段頁表
    for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0]));
         addr < VMALLOC_END; addr += PGDIR_SIZE)
        pmd_clear(pmd_off_k(addr));
}

由於ARM採用兩級映射,pmd不佔用字段,pmd=pgd,而這裏一次清了兩個pgd,和pgd的定義正好對應,所以在for循環時每次步長爲PGDIR_SIZE(2M),根本的原因是ARM linux使用了ARM的兩個段,在用戶態爲__USER_CS__USER_DS,在內核態爲__KERNEL_CS和__KERNEL_DS

初始化頁表裏也會flush對應的TLB。

typedef unsigned long pgd_t[2];

#define pmd_clear(pmdp)			\
	do {				\
		pmdp[0] = __pmd(0);	\
		pmdp[1] = __pmd(0);	\
		clean_pmd_entry(pmdp);	\
	} while (0)

static inline pmd_t *pmd_off_k(unsigned long virt)
{
	return pmd_off(pgd_offset_k(virt), virt);
}

#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)

/* to find an entry in a page-table-directory */
#define pgd_index(addr)		((addr) >> PGDIR_SHIFT)

#define pgd_offset(mm, addr)	((mm)->pgd+pgd_index(addr))

/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)

/* Find an entry in the second-level page table.. */
#define pmd_offset(dir, addr)	((pmd_t *)(dir))

/* Find an entry in the third-level page table.. */
#define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))

map_lowmem函數會爲地段物理內存的每個bank逐一調用map_memory_bank做頁表映射,再調用create_mapping建立頁表

/*
 * Create the page directory entries and any necessary
 * page tables for the mapping specified by `md'.  We
 * are able to cope here with varying sizes and address
 * offsets, and we take full advantage of sections and
 * supersections.
 */
static void __init create_mapping(struct map_desc *md)
{
	unsigned long phys, addr, length, end;
	const struct mem_type *type;
	pgd_t *pgd;
        //在內核爲個bank建立頁表時,它虛擬地址不可能落在user空間
	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
		printk(KERN_WARNING "BUG: not creating mapping for "
		       "0x%08llx at 0x%08lx in user region\n",
		       __pfn_to_phys((u64)md->pfn), md->virtual);
		return;
	}
//高端內存通過alloc_page+kmap的形式映射,在初始化階段不可能有bank的虛擬地址落在VMALLOC線性地址空間
	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
	    md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
		printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
		       "overlaps vmalloc space\n",
		       __pfn_to_phys((u64)md->pfn), md->virtual);
	}

	type = &mem_types[md->type];
//如果有大於4G的物理內存,單獨建立
	/*
	 * Catch 36-bit addresses
	 */
	if (md->pfn >= 0x100000) {
		create_36bit_mapping(md, type);
		return;
	}

	addr = md->virtual & PAGE_MASK;
	phys = (unsigned long)__pfn_to_phys(md->pfn);
	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));//將bank整頁對齊

	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {//不能section(1M)對齊的
		printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
		       "be mapped using pages, ignoring.\n",
		       __pfn_to_phys(md->pfn), addr);
		return;
	}

	pgd = pgd_offset_k(addr);
	end = addr + length;
	do {
		unsigned long next = pgd_addr_end(addr, end);

		alloc_init_section(pgd, addr, next, phys, type);//真正建立頁表的函數

		phys += next - addr;
		addr = next;
	} while (pgd++, addr != end);
}

static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
				      unsigned long end, unsigned long phys,
				      const struct mem_type *type)
{
	pmd_t *pmd = pmd_offset(pgd, addr);

	/*
	 * Try a section mapping - end, addr and phys must all be aligned
	 * to a section boundary.  Note that PMDs refer to the individual
	 * L1 entries, whereas PGDs refer to a group of L1 entries making
	 * up one logical pointer to an L2 table.
	 */
	if (((addr | end | phys) & ~SECTION_MASK) == 0) {//完整的一個section(1M)直接生成段頁表
		pmd_t *p = pmd;

		if (addr & SECTION_SIZE)
			pmd++;

		do {
			*pmd = __pmd(phys | type->prot_sect);//填充段頁表地址和section相關屬性
			phys += SECTION_SIZE;
		} while (pmd++, addr += SECTION_SIZE, addr != end);

		flush_pmd_entry(p);
	} else {//不是段對齊的,要分配二級頁表
		/*
		 * No need to loop; pte's aren't interested in the
		 * individual L1 entries.
		 */
		alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
	}
}

static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
                  unsigned long end, unsigned long pfn,
                  const struct mem_type *type)
{
    pte_t *pte;

    if (pmd_none(*pmd)) {//check pmd指向的L1頁表中的頁表項是否存在,不存在的話使用Bootmem分配所需的二級頁表空間
        pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
        __pmd_populate(pmd, __pa(pte) | type->prot_l1);//填充二級頁表,即pte的物理地址和prot_l1
    }

    pte = pte_offset_kernel(pmd, addr);//建立pte
    do {
        set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);//set_pte_ext與平臺相關,它完成硬件頁表和內核頁表兩者的創建
        pfn++;
    } while (pte++, addr += PAGE_SIZE, addr != end);}






















發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章