今天接着讲memory的初始化函数arm_memblock_init,在上面完成了memory size的初始化,将整个memory添加以memory type的方式添加到了memory block中,下面是memory reserve部分的初始化,其初始化流程如下:
setup_arch->arm_memblock_init->early_init_fdt_scan_reserved_mem->of_scan_flat_dt->__fdt_scan_reserved_mem->__reserved_mem_reserve_reg->early_init_dt_reserve_memory_arch->memblock_reserve->memblock_reserve_region->memblock_add_range.
首先看arm_memblock_init, 此函数主要在上面已经初始化完成的memory size上完成kernel以及外设的memory reserve工作:
void __init arm_memblock_init(const struct machine_desc *mdesc)
{
/* Register the kernel text, kernel data and initrd with memblock. */
#ifdef CONFIG_XIP_KERNEL
memblock_reserve(__pa(_sdata), _end - _sdata);//调用memblock_reserve reserve从_sdata的物理地址到_end的物理地址的一段内存
#else
memblock_reserve(__pa(_stext), _end - _stext);//调用memblock_reserve reserve从_stext的物理地址到_end的物理地址的一段内存,此处宏一般未定义,所以走下面
#endif
#ifdef CONFIG_BLK_DEV_INITRD
/* FDT scan will populate initrd_start */
if (initrd_start && !phys_initrd_size) {//关于initrd的一些判断
phys_initrd_start = __virt_to_phys(initrd_start);
phys_initrd_size = initrd_end - initrd_start;
}
initrd_start = initrd_end = 0;
if (phys_initrd_size &&
!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
(u64)phys_initrd_start, phys_initrd_size);
phys_initrd_start = phys_initrd_size = 0;
}
if (phys_initrd_size &&
memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
(u64)phys_initrd_start, phys_initrd_size);
phys_initrd_start = phys_initrd_size = 0;
}
if (phys_initrd_size) {//如果initrd段没有被reserve,则调用memblock_reserve函数reserve initial ramdisk image区域
memblock_reserve(phys_initrd_start, phys_initrd_size);
/* Now convert initrd to virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
initrd_end = initrd_start + phys_initrd_size;
}
#endif
arm_mm_memblock_reserve();//reserve一段地址用来存放用于进行地址映射的pgd table.
/* reserve any platform specific memblock areas */
if (mdesc->reserve)
mdesc->reserve();
early_init_fdt_scan_reserved_mem();//解析dts的node name为reserved-memory的所有部分进行reserve
/* reserve memory for DMA contiguous allocations */
dma_contiguous_reserve(arm_dma_limit);
/* reserve memory for MT-RAMDUMP */
mrdump_rsvmem();
//memblock_reserve(0x78000000, 0x8000000);
//memblock_reserve(0x72100000, 0x1600000);
//memblock_reserve(0x44640000, 0xE00000);
arm_memblock_steal_permitted = false;
memblock_dump_all();
}
early_init_fdt_scan_reserved_mem:
void __init early_init_fdt_scan_reserved_mem(void)
{
int n;
u64 base, size;
if (!initial_boot_params)//initial_boot_params实际上对应得是fdt的虚拟地址,如果此处为0,则表示没有fdt需要scan,return
return;
/* Reserve the dtb region */
early_init_dt_reserve_memory_arch(__pa(initial_boot_params),
fdt_totalsize(initial_boot_params),
0);//reserve一段设备树文件大小内存用来存放设备树文件
/* Process header /memreserve/ fields */
for (n = 0; ; n++) {
fdt_get_mem_rsv(initial_boot_params, n, &base, &size);//对header /memreserve/ fields进行内存保留,在fdt header中有一组memory reserve参数,具体位置是fdt base address+off_mem_rsvmap,其中off_mem_rsvmap是fdt_header结构体成员
if (!size)
break;
early_init_dt_reserve_memory_arch(base, size, 0);//保留每一个/memreserve/fields定义的memory region
}
of_scan_flat_dt(__fdt_scan_reserved_mem, NULL);//核心执行函数,of_scan_flat_dt将整个dts解析一遍,找到node name为reserved-memory的,然后解析其reg属性获取node的base以及size
fdt_init_reserved_mem();
}
of_scan_flat_dt:
int __init of_scan_flat_dt(int (*it)(unsigned long node,
const char *uname, int depth,
void *data),
void *data)
{
const void *blob = initial_boot_params;
const char *pathp;
int offset, rc = 0, depth = -1;
for (offset = fdt_next_node(blob, -1, &depth);
offset >= 0 && depth >= 0 && !rc;//这里的rc表示回调函数的返回值对于for循环是否继续循环至关重要
offset = fdt_next_node(blob, offset, &depth)) {循环读取所有的node,并调用回调函数进行判断,符合条件则reserve
pathp = fdt_get_name(blob, offset, NULL);//解析出每个node的名字
if (*pathp == '/')
pathp = kbasename(pathp);
rc = it(offset, pathp, depth, data);//调用回调函数,将解析到的node为reserved-memory的地址进行reserve
}
return rc;
}
__fdt_scan_reserved_mem:
static int __init __fdt_scan_reserved_mem(unsigned long node, const char *uname,
int depth, void *data)
{
static int found;
const char *status;
int err;
if (!found && depth == 1 && strcmp(uname, "reserved-memory") == 0) {//过滤操作,如果解析到的node名字为reserved-memory,且此时是root node的子节点,depth为1
if (__reserved_mem_check_root(node) != 0) {
pr_err("Reserved memory: unsupported node format, ignoring\n");
/* break scan */
return 1;
}
found = 1;//找到了reserved-memory节点,found置1
/* scan next node */
return 0;//return 0表示of_scan_flat_dt的for循环会继续循环
} else if (!found) {//没找到reserved-memory节点,则继续遍历下一个节点
/* scan next node */
return 0;
} else if (found && depth < 2) {//如果找到了reserved-memory节点,并完成了对其所有的subnode的scan,则结束scan过程
/* scanning of /reserved-memory has been finished */
return 1;//return 1表示结束of_scan_flat_dt函数的for循环,即退出scan dts操作
}
status = of_get_flat_dt_prop(node, "status", NULL);//如果定义了status属性,则要求其为okey或者ok,一般情况下为null
if (status && strcmp(status, "okay") != 0 && strcmp(status, "ok") != 0)
return 0;
err = __reserved_mem_reserve_reg(node, uname);//定义reserved memory有两种方法,一种是静态定义,也就是定义了reg属性,这时候,可以通过调用__reserved_mem_reserve_reg函数解析reg的(address,size)的二元数组,逐一对每一个定义的memory region进行预留。实际的预留内存动作可以调用memblock_reserve或者memblock_remove,具体调用哪一个是和该节点是否定义no-map属性相关,如果定义了no-map属性,那么说明这段内存操作系统根本不需要进行地址映射,也就是说这块内存是不归操作系统内存管理模块来管理的,而是归于具体的驱动使用(在device tree中,设备节点可以定义memory-region节点来引用在memory node中定义的保留内存,具体可以参考reserved-memory.txt文件)。
if (err == -ENOENT && of_get_flat_dt_prop(node, "size", NULL))//静态定义无法reserve,则采用动态分配,也就是说定义了该内存区域的size(也可以定义alignment或者alloc-range进一步约定动态分配的reserved memory属性,不过这些属性都是option的),但是不指定具体的基地址,让操作系统自己来分配这段memory。
fdt_reserved_mem_save_node(node, uname, 0, 0);//对于没有指定初始地址的,首先判断是否有指定size,如果有则先初始化reserved_mem结构体数组,以及初始化reserve memory region个数,保留reserve信息,后续再通过动态分配的方式来分配reserve的memory
/* scan next node */
return 0;
}
fdt_reserved_mem_save_node:
void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
phys_addr_t base, phys_addr_t size)//没添加一个reserve region,次函数会执行一次,全局变量reserved_mem_count加1,此变量保存reserve memory region的个数,
{
struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) {//超过最大限制MAX_RESERVED_REGIONS=30
pr_err("Reserved memory: not enough space all defined regions.\n");
return;
}
rmem->fdt_node = node;//初始化信息
rmem->name = uname;
rmem->base = base;
rmem->size = size;
reserved_mem_count++;
return;
}
__reserved_mem_reserve_reg:
static int __init __reserved_mem_reserve_reg(unsigned long node,
const char *uname)
{
int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
phys_addr_t base, size;
int len;
const __be32 *prop;
int nomap, first = 1;
prop = of_get_flat_dt_prop(node, "reg", &len);//获取节点的reg属性值
if (!prop)
return -ENOENT;
if (len && len % t_len != 0) {
pr_err("Reserved memory: invalid reg property in '%s', skipping node.\n",
uname);
return -EINVAL;
}
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
while (len >= t_len) {//循环获取当前节点的需要reserve的起始地址以及大小,每个节点有可能会reserve多段内存,这也是len会大于t_len的原因
base = dt_mem_next_cell(dt_root_addr_cells, &prop);//获取当前节点的base和size
size = dt_mem_next_cell(dt_root_size_cells, &prop);
if (size &&
early_init_dt_reserve_memory_arch(base, size, nomap) == 0)//核心函数,进行内存的reserve操作
pr_info("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n",
uname, &base, (unsigned long)size / SZ_1M);
else
pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %ld MiB\n",
uname, &base, (unsigned long)size / SZ_1M);
len -= t_len;
if (first) {
fdt_reserved_mem_save_node(node, uname, base, size);
first = 0;
}
}
return 0;
}
early_init_dt_reserve_memory_arch:
int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
phys_addr_t size, bool nomap)
{
if (nomap)//如果定义了no-map属性,则说明此memory region不需要映射,直接memblock_remove掉这一段region
return memblock_remove(base, size);//此处为memory type region随着reserve memory region增加的关键所在
return memblock_reserve(base, size);//调用memblock_reserve->memblock_reserve_region->memblock_add_range,memblock_add_range操作见前面文章,完成reserve操作
}
device tree中的reserved-memory节点及其子节点静态或者动态定义了若干的reserved memory region,静态定义的memory region起始地址和size都是确定的,因此可以立刻调用memblock的模块进行内存区域的预留,但是对于动态定义的memory region,__fdt_scan_reserved_mem只是将信息保存在了reserved_mem全局变量中,并没有进行实际的内存预留动作,具体的操作在fdt_init_reserved_mem函数中,代码如下:
void __init fdt_init_reserved_mem(void)
{
int i;
/* check for overlapping reserved regions */
__rmem_check_for_overlap();//检测reserve memory region是否有重叠,如果有,打印错误信息
for (i = 0; i < reserved_mem_count; i++) {//遍历每一个reserved memory region
struct reserved_mem *rmem = &reserved_mem[i];
unsigned long node = rmem->fdt_node;
int len;
const __be32 *prop;
int err = 0;
prop = of_get_flat_dt_prop(node, "phandle", &len);//每一个需要被其他node引用的node都需要定义"phandle", 或者"linux,phandle"。虽然在实际的device tree source中看不到这个属性,实际上dtc会完美的处理这一切的。
if (!prop)
prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
if (prop)
rmem->phandle = of_read_number(prop, len/4);
if (rmem->size == 0)//size等于0的memory region表示这是一个动态分配region,base address尚未定义,因此我们需要通过__reserved_mem_alloc_size函数对节点进行分析(size、alignment等属性),然后调用memblock的alloc接口函数进行memory block的分配,最终的结果是确定base address和size,并将这段memory region从memory type的数组中移到reserved type的数组中。当然,如果定义了no-map属性,那么这段memory会从系统中之间删除(memory type和reserved type数组中都没有这段memory的定义)。
err = __reserved_mem_alloc_size(node, rmem->name,
&rmem->base, &rmem->size);
if (err == 0)
__reserved_mem_init_node(rmem);//保留内存有两种使用场景,一种是被特定的驱动使用,这时候在特定驱动的初始化函数(probe函数)中自然会进行处理。还有一种场景就是被所有驱动或者内核模块使用,例如CMA,per-device Coherent DMA的分配等,这时候,我们需要借用device tree的匹配机制进行这段保留内存的初始化动作。有兴趣的话可以看看RESERVEDMEM_OF_DECLARE的定义,这里就不再描述了。
}
}
#define RESERVEDMEM_OF_DECLARE(name, compat, init) \
_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
#define _OF_DECLARE(table, name, compat, fn, fn_type) \
static const struct of_device_id __of_table_##name \
__used __section(__##table##_of_table) \
= { .compatible = compat, \
.data = (fn == (fn_type)NULL) ? fn : fn }
__reserved_mem_init_node:
static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
{
extern const struct of_device_id __reservedmem_of_table[];
const struct of_device_id *i;
for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {//遍历所有初始化了of_device_id结构体的驱动
reservedmem_of_init_fn initfn = i->data;
const char *compat = i->compatible;
if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))//通过compatible name进行匹配,如果匹配到了则说明有专用的驱动来使用此reserve memory region,如果没有匹配到,这说明是通用reserve memory region,接触次轮循环,继续scan下一个驱动
continue;
if (initfn(rmem) == 0) {//调用驱动注册的回调函数进行初始化以及驱动在此需要做的事情
pr_info("Reserved memory: initialized node %s, compatible id %s\n",
rmem->name, compat);
return 0;
}
}
return -ENOENT;
}
最后来看一下device tree的实例:
217 reserved_memory: reserved-memory {
218 #address-cells = <2>;
219 #size-cells = <2>;
220 ranges;
221 /*TODO: add reserved memory node here*/
222 pstore-reserved-memory@44410000 {
223 compatible = "mediatek,pstore";
224 reg = <0 0x44410000 0 0xe0000>;
225 };
226 ram_console-reserved-memory@44400000 {
227 compatible = "mediatek,ram_console";
228 reg = <0 0x44400000 0 0x10000>;
229 };
230 minirdump-reserved-memory@444f0000{
231 compatible = "mediatek,minirdump";
232 reg = <0 0x444f0000 0 0x10000>;
233 };
234 consys-reserve-memory {
235 compatible = "mediatek,consys-reserve-memory";
236 #address-cells = <2>;
237 #size-cells = <2>;
238 no-map;
239 size = <0 0x200000>;
240 alignment = <0 0x200000>;
241 alloc-ranges = <0 0x40000000 0 0x38000000>;
242 };
243 //add XWWYWHSJB-1791 by wurui.zhang 2018-05-29 start
244 soter-shared-mem {
245 compatible = "microtrust,shared_mem";
246 no-map;
247 alloc-ranges = <0 0x40000000 0 0x38000000>;
248 size = <0 0x500000>;
249 };
250 //add XWWYWHSJB-1791 by wurui.zhang 2018-05-29 end
251 };
总结:
物理内存布局是归于memblock模块进行管理的,该模块定义了struct memblock memblock这样的一个全局变量保存了memory type和reserved type的memory region list。而通过这两个memory region的数组,我们就知道了操作系统需要管理的所有系统内存的布局情况。
reserve实际上就是在总的memory size上面挖坑。
1. 往memblock里面添加type为reserve类型的memory操作使用memblock_reserve()函数.
2. 往memblock里面添加type为memory类型的memory时候使用memblock_add().
3. 从memblock里面移除一块type为memory的memory使用memblock_remove().
4. 从memblock里面移除一块type为reserve的memory使用memblock_free().
注本文有部分参考了wowotech的内存初始化代码分析(二):内存布局