思考:如何在PowerPC架構e500芯片實現low_memory的大小控制?
內存區域zone
三個內存區域zone,分別爲ZONE_DMA,ZONE_NORMAL和ZONE_HIGHMEM,在e500上ZONE_NORMAL實際劃分的內存爲空,所以所有低端內存(low_memory)都劃分到ZONE_DMA中;
內存區域的劃分信息保存在max_zone_pfns中,其定義如下:
static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
[0 ... MAX_NR_ZONES - 1] = ~0UL
};
每個zone都有一個max_zone_pfn對應來限定內存頁幀上限,同時作爲更高內存區域內存頁幀的下限。在paging_init函數中計算各個zone區域的劃分信息:
/*
* paging_init() sets up the page tables - in fact we've already done this.
*/
void __init paging_init(void)
{
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
enum zone_type top_zone;
#ifdef CONFIG_PPC32
unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
unsigned long end = __fix_to_virt(FIX_HOLE);
for (; v < end; v += PAGE_SIZE)
map_page(v, 0, 0); /* XXX gross */
#endif
#ifdef CONFIG_HIGHMEM
map_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
kmap_prot = PAGE_KERNEL;
#endif /* CONFIG_HIGHMEM */
printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n",
(unsigned long long)top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
#ifdef CONFIG_HIGHMEM
top_zone = ZONE_HIGHMEM;
limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
#else
top_zone = ZONE_NORMAL;
#endif
limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
zone_limits_final = true;
free_area_init_nodes(max_zone_pfns);
mark_nonram_nosave();
}
首先看函數尾部,分別調用兩次limit_zone_pfn對ZONE_NORMAL和ZONE_HIGHMEM頁幀進行限制,這裏需要注意的一點是因爲沒有對ZONE_DMA進行特殊處理,因此會導致ZONE_DMA佔用所有低端內存,ZONE_NORMAL爲空;
ZONE_HIGHMEM的設置用到了top_of_ram,爲RAM的最高空間,通過memblock可以查詢到,在內存節點的添加以及後面的一些運算過程中會設置物理內存的上限;
ZONE_NORMAL的設置用到了lowmem_end_addr即低端內存的末尾,其數值又下面計算過程決定:
在MMU_init中:
lowmem_end_addr = memstart_addr + total_lowmem;
其值直接依賴於total_lowmem,該計算過程進行了兩次,分別在adjust_total_lowmem之前和之後,而在adjust_total_lowmem中對__max_low_memory進行了計算,從而將可能進一步限定total_lowmem的取值,那麼可以看到total_lowmem(在可能被__max_low_memory覆蓋之前)和__max_low_memory的最小值決定了lowmem_end_addr的值,並且此時將更小者賦值給了total_lowmem作爲最新值;total_lowmem即爲RAM的大小;
__max_low_memory通過下面過程取得:
1. 定義和初始化
#define CONFIG_LOWMEM_SIZE 0x30000000
#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
其中CONFIG_LOWMEM_SIZE爲內核.config中定義的大小;
2. 內核啓動參數bootargs中vmalloc參數的影響
/*
* vmalloc=size forces the vmalloc area to be exactly 'size'
* bytes. This can be used to increase (or decrease) the low
* memory area. Thus this can be also used to decrease (or increase)
* low memory area.
*/
static int __init early_vmalloc(char *arg)
{
unsigned long vmalloc_reserve = memparse(arg, NULL);
PRT("vmalloc_reserve = 0x%lx", vmalloc_reserve);
if (vmalloc_reserve < SZ_16M) {
vmalloc_reserve = SZ_16M;
PRT("vmalloc area too small, limiting to %luMB\n",
vmalloc_reserve >> 20);
}
if (vmalloc_reserve > VMALLOC_RESERVE_MAX) {
vmalloc_reserve = VMALLOC_RESERVE_MAX;
PRT("vmalloc area is too big, limiting to %luMB\n",
vmalloc_reserve >> 20);
}
/* low memory aligned 16M*/
PRT("__max_low_memory = 0x%lx", __max_low_memory);
__max_low_memory = __pa(IOREMAP_TOP) - VMALLOC_OFFSET - vmalloc_reserve;
__max_low_memory &= ~(SZ_16M - 1);
PRT("__max_low_memory = 0x%lx", __max_low_memory);
return 0;
}
early_param("vmalloc", early_vmalloc);
early_vmalloc函數根據vmalloc參數解析出vmalloc_reserve的大小,大小有一定的限制,指定最小16M,最大爲1G的內核虛擬地址空間除去一個VMALLOC_OFFSET和64M,64M應該是作爲系統低端保障內存的空間大小;
接下來直接算出__max_low_memory的大小,並進行16M對其:
__max_low_memory = __pa(IOREMAP_TOP) - VMALLOC_OFFSET - vmalloc_reserve;
__max_low_memory &= ~(SZ_16M - 1);
3. adjust_total_lowmem進行調整,這是CONFIG_FSL_BOOKE架構特性特有
取__max_low_memory和total_lowmem中較小者,然後通過map_mem_in_cams接口進行映射建立tlb條目,將可以映射的最大RAM作爲最終結果重新賦值給__max_low_memory;可以看到如果tlb條目可映射的RAM數量較小,則__max_low_memory會進一步在此被修改變小
按照上面1/2/3條逐步進行限制,如果bootargs未配置vmalloc參數則無第2條執行;
回到最開始的部分,通過__max_low_memory參數最終的值和total_lowmem,在MMU_init中確定total_lowmem的最終值,從而決定了lowmem_end_addr的大小,相對total_lowmem偏移memstart_addr,在此偏移爲0;進而決定了ZONE_DMA和ZONE_NORMAL的內存佈局