日期 | 內核版本 | 架構 | 作者 | GitHub | CSDN |
---|---|---|---|---|---|
2017-07-017 | Linux-4.12 | X86 | lwhuq | LinuxMemoryStudy | Linux內存管理 |
1 Introduction
在Linux內核早期啓動階段,在Linux的內存管理模塊還沒有初始化完成之前,內核也需要提供簡化的內存管理模塊來滿足內存分配請求。早期的內核中負責初始化階段的內存分配器稱爲引導內存分配器(bootmem分配器)。bootmem分配器基於最先適配(first-first)分配器的原理(這兒是很多系統的內存分配所使用的原理), 使用一個位圖來管理頁。最新的內核過渡到使用memblock,詳見patch。
Memoryblock和bootmem這兩種機制對提供的API是一致的,因此對用戶是透明的。內核中可以通過編譯選項CONFIG_NO_BOOTMEM來選擇使用哪一種機制,定義在mm/Makefile#L46
ifdef CONFIG_NO_BOOTMEM
obj-y += nobootmem.o
else
obj-y += bootmem.o
endif
2 Data structure
Memoryblock的所有數據結構定義在include/linux/memblock.h。
第一個數據結構的名字是memblock,定義在include/linux/memblock.h#L48
struct memblock {
bool bottom_up; /* is bottom up direction? 如果true,從下往上分配內存 */
phys_addr_t current_limit; /* memory block的大小限制 */
/* 三種不同內存類型:內存,預留,物理 */
struct memblock_type memory;
struct memblock_type reserved;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem;
#endif
};
第二個數據結構是memblock_type,定義在include/linux/memblock.h#L40struct memblock_type {
unsigned long cnt; /* number of regions 內存區域的數目*/
unsigned long max; /* size of the allocated array 已經分配的內存區域大小*/
phys_addr_t total_size; /* size of all regions 所有內存區域的大小*/
struct memblock_region *regions; /* 指針指向memblock_region結構體 */
char *name; /* 名字 */
};
memblock_region結構用於描述memory region,定義在include/linux/memblock.h#L31
struct memblock_region {
phys_addr_t base;
phys_addr_t size;
unsigned long flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int nid;
#endif
};
memoryblock_region記錄了當前memory region的起始地址,大小,標誌和Node ID。標誌的定義在include/linux/memblock.h#L24
/* Definition of memblock flags. */
enum {
MEMBLOCK_NONE = 0x0, /* No special request */
MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
MEMBLOCK_MIRROR = 0x2, /* mirrored region */
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
};
3
Memblock 初始化
Memblock結構的實例是一個同名全局靜態變量,定義在mm/memblock.c#L34
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
.memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_REGIONS,
.reserved.name = "reserved",
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
.physmem.regions = memblock_physmem_init_regions,
.physmem.cnt = 1, /* empty dummy entry */
.physmem.max = INIT_PHYSMEM_REGIONS,
.physmem.name = "physmem",
#endif
.bottom_up = false,
.current_limit = MEMBLOCK_ALLOC_ANYWHERE,
};
- 宏__initdata_memblock指定了結構儲存位置,如果定義了CONFIG_ARCH_DISCARD_MEMBLOCK,則存放在__meminitdata
- 每中memory type的cnt字段都初始化爲1
- 每種memory type的regions都指向全局靜態數組。數組單元個數,memory和reserved初始化爲INIT_MEMBLOCK_REGIONS,physical memory初始化爲INIT_PHYSMEM_REGIONS。因此max字段也初始化同樣的值
#define INIT_MEMBLOCK_REGIONS 128
#define INIT_PHYSMEM_REGIONS 4
- buttom_up被初始化爲false,說明內存分配是從高到低
- current_limit被初始化爲MEMBLOCK_ALLOC_ANYWHERE,可訪問到最高地址空間。
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
4 Memblock APIs
4.1 Add
在bootmem.h中的相關APIs
4.1.1 memblock_add_range
/**
* memblock_add_range - add new memblock region
* @type: memblock type to add new region into
* @base: base address of the new region
* @size: size of the new region
* @nid: nid of the new region
* @flags: flags of the new region
*
* Add new memblock region [@base,@base+@size) into @type. The new region
* is allowed to overlap with existing ones - overlaps don't affect already
* existing regions. @type is guaranteed to be minimal (all neighbouring
* compatible regions are merged) after the addition.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
int __init_memblock memblock_add_range(struct memblock_type *type,
phys_addr_t base, phys_addr_t size,
int nid, unsigned long flags)
{
bool insert = false;
phys_addr_t obase = base;
phys_addr_t end = base + memblock_cap_size(base, &size);
int idx, nr_new;
struct memblock_region *rgn;
if (!size)
return 0;
/* special case for empty array */
if (type->regions[0].size == 0) {
WARN_ON(type->cnt != 1 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
return 0;
}
repeat:
/*
* The following is executed twice. Once with %false @insert and
* then with %true. The first counts the number of regions needed
* to accommodate the new area. The second actually inserts them.
*/
base = obase;
nr_new = 0;
for_each_memblock_type(type, rgn) {
phys_addr_t rbase = rgn->base;
phys_addr_t rend = rbase + rgn->size;
if (rbase >= end)
break;
if (rend <= base)
continue;
/*
* @rgn overlaps. If it separates the lower part of new
* area, insert that portion.
*/
if (rbase > base) {
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
WARN_ON(nid != memblock_get_region_node(rgn));
#endif
WARN_ON(flags != rgn->flags);
nr_new++;
if (insert)
memblock_insert_region(type, idx++, base,
rbase - base, nid,
flags);
}
/* area below @rend is dealt with, forget about it */
base = min(rend, end);
}
/* insert the remaining portion */
if (base < end) {
nr_new++;
if (insert)
memblock_insert_region(type, idx, base, end - base,
nid, flags);
}
if (!nr_new)
return 0;
/*
* If this was the first round, resize array and repeat for actual
* insertions; otherwise, merge and return.
*/
if (!insert) {
while (type->cnt + nr_new > type->max)
if (memblock_double_array(type, obase, size) < 0)
return -ENOMEM;
insert = true;
goto repeat;
} else {
memblock_merge_regions(type);
return 0;
}
}
- 第一次循環檢查是否有region的overlap。並且檢查memory type存放的memory region實例個數type->max是否足夠容納新增的region。不夠的話就調用memblock_double_array擴容。如果有需要添加的region就設置insert = true。最後goto到repeat執行第二次循環
- 第二次循環中,執行insert == true代碼塊,調用memblock_insert_region插入region,最後調用memblock_merge_regions合併相鄰region。
- idx沒有初始化,從默認值0開始?
4.2 Free and remove
4.3 Allocate
- memory allocate就是把內存範圍添加到memory reserved region
5 memblock初始化
X86_64結構內核從E820和EFI memmap得到boot內存信息,隨後根據boot內存信息建立memory block結構。具體實現在setup_arch函數,定義在arch/x86/kernel/setup.c#L848
void __init setup_arch(char **cmdline_p)
{
memblock_reserve(__pa_symbol(_text),
(unsigned long)__bss_stop - (unsigned long)_text);
#ifdef CONFIG_EFI
if (efi_enabled(EFI_BOOT))
efi_memblock_x86_reserve_range();
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
* cannot migrate the kernel pages. When memory hotplug is
* enabled, we should prevent memblock from allocating memory
* for the kernel.
*
* ACPI SRAT records all hotpluggable memory ranges. But before
* SRAT is parsed, we don't know about it.
*
* The kernel image is loaded into memory at very early time. We
* cannot prevent this anyway. So on NUMA system, we set any
* node the kernel resides in as un-hotpluggable.
*
* Since on modern servers, one node could have double-digit
* gigabytes memory, we can assume the memory around the kernel
* image is also un-hotpluggable. So before SRAT is parsed, just
* allocate memory near the kernel image to try the best to keep
* the kernel away from hotpluggable memory.
*/
if (movable_node_is_enabled())
memblock_set_bottom_up(true);
#endif
/* after early param, so could get panic from serial */
memblock_x86_reserve_range_setup_data();
/*
* Need to conclude brk, before e820__memblock_setup()
* it could use memblock_find_in_range, could overlap with
* brk area.
*/
reserve_brk();
cleanup_highmap();
memblock_set_current_limit(ISA_END_ADDRESS);
e820__memblock_setup();
}
- 最後的e820_memblock_setup()真正完成memory block的添加初始化工作。在此之前的函數都只是調用memblock_reserve從reserve內存申請