memory block - Linux內存管理(5)


日期 內核版本 架構 作者 GitHub CSDN
2017-07-017 Linux-4.12 X86 lwhuq LinuxMemoryStudy Linux內存管理

1 Introduction

  在Linux內核早期啓動階段,在Linux的內存管理模塊還沒有初始化完成之前,內核也需要提供簡化的內存管理模塊來滿足內存分配請求。早期的內核中負責初始化階段的內存分配器稱爲引導內存分配器(bootmem分配器)。bootmem分配器基於最先適配(first-first)分配器的原理(這兒是很多系統的內存分配所使用的原理), 使用一個位圖來管理頁。最新的內核過渡到使用memblock,詳見patch

  Memoryblock和bootmem這兩種機制對提供的API是一致的,因此對用戶是透明的。內核中可以通過編譯選項CONFIG_NO_BOOTMEM來選擇使用哪一種機制,定義在mm/Makefile#L46

ifdef CONFIG_NO_BOOTMEM
	obj-y		+= nobootmem.o
else
	obj-y		+= bootmem.o
endif

2 Data structure

  Memoryblock的所有數據結構定義在include/linux/memblock.h。

  第一個數據結構的名字是memblock,定義在include/linux/memblock.h#L48

struct memblock {
	bool bottom_up;  /* is bottom up direction? 如果true,從下往上分配內存 */
	phys_addr_t current_limit; /* memory block的大小限制 */
	/* 三種不同內存類型:內存,預留,物理 */
	struct memblock_type memory;
	struct memblock_type reserved;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	struct memblock_type physmem;
#endif
};
  第二個數據結構是memblock_type,定義在include/linux/memblock.h#L40
struct memblock_type {
	unsigned long cnt;	/* number of regions 內存區域的數目*/
	unsigned long max;	/* size of the allocated array 已經分配的內存區域大小*/
	phys_addr_t total_size;	/* size of all regions 所有內存區域的大小*/
	struct memblock_region *regions; /* 指針指向memblock_region結構體 */
	char *name; /* 名字 */
};

  memblock_region結構用於描述memory region,定義在include/linux/memblock.h#L31

struct memblock_region {
	phys_addr_t base;
	phys_addr_t size;
	unsigned long flags;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
	int nid;
#endif
};
  memoryblock_region記錄了當前memory region的起始地址,大小,標誌和Node ID。標誌的定義在include/linux/memblock.h#L24

/* Definition of memblock flags. */
enum {
	MEMBLOCK_NONE		= 0x0,	/* No special request */
	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
};


  總結來說,整個memory block的data structure佈局如下

3 Memblock 初始化

Memblock結構的實例是一個同名全局靜態變量,定義在mm/memblock.c#L34

static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif

struct memblock memblock __initdata_memblock = {
	.memory.regions		= memblock_memory_init_regions,
	.memory.cnt		= 1,	/* empty dummy entry */
	.memory.max		= INIT_MEMBLOCK_REGIONS,
	.memory.name		= "memory",

	.reserved.regions	= memblock_reserved_init_regions,
	.reserved.cnt		= 1,	/* empty dummy entry */
	.reserved.max		= INIT_MEMBLOCK_REGIONS,
	.reserved.name		= "reserved",

#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	.physmem.regions	= memblock_physmem_init_regions,
	.physmem.cnt		= 1,	/* empty dummy entry */
	.physmem.max		= INIT_PHYSMEM_REGIONS,
	.physmem.name		= "physmem",
#endif

	.bottom_up		= false,
	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
};

  • __initdata_memblock指定了結構儲存位置,如果定義了CONFIG_ARCH_DISCARD_MEMBLOCK,則存放在__meminitdata
  • 每中memory type的cnt字段都初始化爲1
  • 每種memory type的regions都指向全局靜態數組。數組單元個數,memory和reserved初始化爲INIT_MEMBLOCK_REGIONS,physical memory初始化爲INIT_PHYSMEM_REGIONS。因此max字段也初始化同樣的值
#define INIT_MEMBLOCK_REGIONS	128
#define INIT_PHYSMEM_REGIONS	4
  • buttom_up被初始化爲false,說明內存分配是從高到低
  • current_limit被初始化爲MEMBLOCK_ALLOC_ANYWHERE,可訪問最高地址空間。
#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)

4 Memblock APIs  

4.1 Add


  在bootmem.h中的相關APIs


4.1.1 memblock_add_range

定義在mm/memblock.c#L496

/**
 * memblock_add_range - add new memblock region
 * @type: memblock type to add new region into
 * @base: base address of the new region
 * @size: size of the new region
 * @nid: nid of the new region
 * @flags: flags of the new region
 *
 * Add new memblock region [@base,@base+@size) into @type.  The new region
 * is allowed to overlap with existing ones - overlaps don't affect already
 * existing regions.  @type is guaranteed to be minimal (all neighbouring
 * compatible regions are merged) after the addition.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init_memblock memblock_add_range(struct memblock_type *type,
				phys_addr_t base, phys_addr_t size,
				int nid, unsigned long flags)
{
	bool insert = false;
	phys_addr_t obase = base;
	phys_addr_t end = base + memblock_cap_size(base, &size);
	int idx, nr_new;
	struct memblock_region *rgn;

	if (!size)
		return 0;

	/* special case for empty array */
	if (type->regions[0].size == 0) {
		WARN_ON(type->cnt != 1 || type->total_size);
		type->regions[0].base = base;
		type->regions[0].size = size;
		type->regions[0].flags = flags;
		memblock_set_region_node(&type->regions[0], nid);
		type->total_size = size;
		return 0;
	}
repeat:
	/*
	 * The following is executed twice.  Once with %false @insert and
	 * then with %true.  The first counts the number of regions needed
	 * to accommodate the new area.  The second actually inserts them.
	 */
	base = obase;
	nr_new = 0;

	for_each_memblock_type(type, rgn) {
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
			break;
		if (rend <= base)
			continue;
		/*
		 * @rgn overlaps.  If it separates the lower part of new
		 * area, insert that portion.
		 */
		if (rbase > base) {
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
			WARN_ON(nid != memblock_get_region_node(rgn));
#endif
			WARN_ON(flags != rgn->flags);
			nr_new++;
			if (insert)
				memblock_insert_region(type, idx++, base,
						       rbase - base, nid,
						       flags);
		}
		/* area below @rend is dealt with, forget about it */
		base = min(rend, end);
	}

	/* insert the remaining portion */
	if (base < end) {
		nr_new++;
		if (insert)
			memblock_insert_region(type, idx, base, end - base,
					       nid, flags);
	}

	if (!nr_new)
		return 0;

	/*
	 * If this was the first round, resize array and repeat for actual
	 * insertions; otherwise, merge and return.
	 */
	if (!insert) {
		while (type->cnt + nr_new > type->max)
			if (memblock_double_array(type, obase, size) < 0)
				return -ENOMEM;
		insert = true;
		goto repeat;
	} else {
		memblock_merge_regions(type);
		return 0;
	}
}

  • 第一次循環檢查是否有region的overlap。並且檢查memory type存放的memory region實例個數type->max是否足夠容納新增的region。不夠的話就調用memblock_double_array擴容。如果有需要添加的region就設置insert = true。最後goto到repeat執行第二次循環
  • 第二次循環中,執行insert == true代碼塊,調用memblock_insert_region插入region,最後調用memblock_merge_regions合併相鄰region。
  • idx沒有初始化,從默認值0開始?

4.2 Free and remove


4.3 Allocate


  • memory allocate就是把內存範圍添加到memory reserved region

5 memblock初始化

  X86_64結構內核從E820和EFI memmap得到boot內存信息,隨後根據boot內存信息建立memory block結構。具體實現在setup_arch函數,定義在arch/x86/kernel/setup.c#L848

void __init setup_arch(char **cmdline_p)
{
	memblock_reserve(__pa_symbol(_text),
			 (unsigned long)__bss_stop - (unsigned long)_text);


#ifdef CONFIG_EFI
	if (efi_enabled(EFI_BOOT))
		efi_memblock_x86_reserve_range();
#endif


#ifdef CONFIG_MEMORY_HOTPLUG
	/*
	 * Memory used by the kernel cannot be hot-removed because Linux
	 * cannot migrate the kernel pages. When memory hotplug is
	 * enabled, we should prevent memblock from allocating memory
	 * for the kernel.
	 *
	 * ACPI SRAT records all hotpluggable memory ranges. But before
	 * SRAT is parsed, we don't know about it.
	 *
	 * The kernel image is loaded into memory at very early time. We
	 * cannot prevent this anyway. So on NUMA system, we set any
	 * node the kernel resides in as un-hotpluggable.
	 *
	 * Since on modern servers, one node could have double-digit
	 * gigabytes memory, we can assume the memory around the kernel
	 * image is also un-hotpluggable. So before SRAT is parsed, just
	 * allocate memory near the kernel image to try the best to keep
	 * the kernel away from hotpluggable memory.
	 */
	if (movable_node_is_enabled())
		memblock_set_bottom_up(true);
#endif

	/* after early param, so could get panic from serial */
	memblock_x86_reserve_range_setup_data();

	/*
	 * Need to conclude brk, before e820__memblock_setup()
	 *  it could use memblock_find_in_range, could overlap with
	 *  brk area.
	 */
	reserve_brk();

	cleanup_highmap();

	memblock_set_current_limit(ISA_END_ADDRESS);
	e820__memblock_setup();

}

  • 最後的e820_memblock_setup()真正完成memory block的添加初始化工作。在此之前的函數都只是調用memblock_reserve從reserve內存申請

6 Reference


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章