linux內核模塊動態加載過程

閱讀本文需要先了解 ELF文件格式 的相關知識,以下引用的kernel源代碼,都是基於linux kernel源代碼版本:3.4。

linux內核模塊分兩種形態,一是靜態編譯進內核的模塊,二是用insmod命令動態加載的模塊,也就是後綴名爲KO的文件。這裏主要討論linux內核動態加載模塊的過程,也就是KO文件被動態加載進內核,並運行的過程。


後綴爲KO的文件其實是一種ELF格式文件,很類似於ELF目標文件(.o文件),但是又與ELF目標文件有一點小區別。使用readelf工具可以看到,KO文件裏有一個叫.gnu.linkonce.this_module的段,而普通目標文件是沒有這個段的。這個段的內容其實是一個struct module結構體(段的地址就等於module結構體的首地址),記錄了KO模塊的一些信息,這個結構體在linux kernel源代碼裏也有定義(include/linux/module.h),因爲內核在加載模塊時要用到這個結構體。


當linux順利啓動,進入shell的時候,就可以輸入insmod命令,加載我們自己的內核模塊拉。insmod命令封裝了一個叫 sys_init_module 的系統調用,sys_init_module源碼如下:

SYSCALL_DEFINE3(init_module, void __user *, umod,
		unsigned long, len, const char __user *, uargs)
{
	struct module *mod;
	int ret = 0;

	/* Must have permission */
	if (!capable(CAP_SYS_MODULE) || modules_disabled)
		return -EPERM;

	/* Do all the hard work */
	mod = load_module(umod, len, uargs);
	if (IS_ERR(mod))
		return PTR_ERR(mod);

	blocking_notifier_call_chain(&module_notify_list,
			MODULE_STATE_COMING, mod);

	/* Set RO and NX regions for core */
	set_section_ro_nx(mod->module_core,
				mod->core_text_size,
				mod->core_ro_size,
				mod->core_size);

	/* Set RO and NX regions for init */
	set_section_ro_nx(mod->module_init,
				mod->init_text_size,
				mod->init_ro_size,
				mod->init_size);

	do_mod_ctors(mod);
	/* Start the module */
    printk(KERN_ERR "mod->init = %p\n", mod->init);
	if (mod->init != NULL)
		ret = do_one_initcall(mod->init);
	if (ret < 0) {
		/* Init routine failed: abort.  Try to protect us from
                   buggy refcounters. */
		mod->state = MODULE_STATE_GOING;
		synchronize_sched();
		module_put(mod);
		blocking_notifier_call_chain(&module_notify_list,
					     MODULE_STATE_GOING, mod);
		free_module(mod);
		wake_up(&module_wq);
		return ret;
	}
	if (ret > 0) {
		printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
		       __func__, mod->name, ret,
		       __func__);
		dump_stack();
	}

	/* Now it's a first class citizen!  Wake up anyone waiting for it. */
	mod->state = MODULE_STATE_LIVE;
	wake_up(&module_wq);
	blocking_notifier_call_chain(&module_notify_list,
				     MODULE_STATE_LIVE, mod);

	/* We need to finish all async code before the module init sequence is done */
	async_synchronize_full();

	mutex_lock(&module_mutex);
	/* Drop initial reference. */
	module_put(mod);
	trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
	mod->num_symtab = mod->core_num_syms;
	mod->symtab = mod->core_symtab;
	mod->strtab = mod->core_strtab;
#endif
	unset_module_init_ro_nx(mod);
	module_free(mod, mod->module_init);
	mod->module_init = NULL;
	mod->init_size = 0;
	mod->init_ro_size = 0;
	mod->init_text_size = 0;
	mutex_unlock(&module_mutex);

	return 0;
}

這是一個用宏定義的函數,展開後函數名就是sys_init_module,參數有3個,umod是ko文件在用戶空間的首地址,len是ko文件的大小,uargs是用戶空間的參數指針。進入這個函數後先check一下permission,然後調用load_module(),加載ko模塊的工作主要就是在load_module()這個函數中完成的。

static struct module *load_module(void __user *umod,
				  unsigned long len,
				  const char __user *uargs)
{
	struct load_info info = { NULL, };
	struct module *mod;
	long err;

	pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
	       umod, len, uargs);
    printk(KERN_ERR "load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs);
    printk(KERN_ERR "load_module: umod=%x, len=%lu, uargs=%p\n", umod, len, uargs);
	/* Copy in the blobs from userspace, check they are vaguely sane. */
	err = copy_and_check(&info, umod, len, uargs);
	if (err)
		return ERR_PTR(err);

	/* Figure out module layout, and allocate all the memory. */
	mod = layout_and_allocate(&info);
    
    printk(KERN_ERR "..mod->init: %x\n", mod->init);
	if (IS_ERR(mod)) {
		err = PTR_ERR(mod);
		goto free_copy;
	}

	/* Now module is in final location, initialize linked lists, etc. */
	err = module_unload_init(mod);
	if (err)
		goto free_module;

	/* Now we've got everything in the final locations, we can
	 * find optional sections. */
	find_module_sections(mod, &info);

	err = check_module_license_and_versions(mod);
	if (err)
		goto free_unload;

	/* Set up MODINFO_ATTR fields */
	setup_modinfo(mod, &info);

	/* Fix up syms, so that st_value is a pointer to location. */
    printk(KERN_ERR "---------------------------------------\n");
    
	err = simplify_symbols(mod, &info);
	if (err < 0)
		goto free_modinfo;
    printk(KERN_ERR "11..mod->init: %x\n", mod->init);
	err = apply_relocations(mod, &info);
    printk(KERN_ERR "22..mod->init: %x\n", mod->init);
	if (err < 0)
		goto free_modinfo;

	err = post_relocation(mod, &info);
	if (err < 0)
		goto free_modinfo;

	flush_module_icache(mod);
	/* Now copy in args */
	mod->args = strndup_user(uargs, ~0UL >> 1);
	if (IS_ERR(mod->args)) {
		err = PTR_ERR(mod->args);
		goto free_arch_cleanup;
	}

	/* Mark state as coming so strong_try_module_get() ignores us. */
	mod->state = MODULE_STATE_COMING;

	/* Now sew it into the lists so we can get lockdep and oops
	 * info during argument parsing.  No one should access us, since
	 * strong_try_module_get() will fail.
	 * lockdep/oops can run asynchronous, so use the RCU list insertion
	 * function to insert in a way safe to concurrent readers.
	 * The mutex protects against concurrent writers.
	 */
	mutex_lock(&module_mutex);
	if (find_module(mod->name)) {
		err = -EEXIST;
		goto unlock;
	}

	/* This has to be done once we're sure module name is unique. */
	dynamic_debug_setup(info.debug, info.num_debug);

	/* Find duplicate symbols */
	err = verify_export_symbols(mod);
	if (err < 0)
		goto ddebug;

	module_bug_finalize(info.hdr, info.sechdrs, mod);
	list_add_rcu(&mod->list, &modules);
	mutex_unlock(&module_mutex);

	/* Module is ready to execute: parsing args may do that. */
	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
			 -32768, 32767, NULL);
	if (err < 0)
		goto unlink;

	/* Link in to syfs. */
	err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
	if (err < 0)
		goto unlink;

	/* Get rid of temporary copy. */
	free_copy(&info);

	/* Done! */
	trace_module_load(mod);
	return mod;

 unlink:
	mutex_lock(&module_mutex);
	/* Unlink carefully: kallsyms could be walking list. */
	list_del_rcu(&mod->list);
	module_bug_cleanup(mod);

 ddebug:
	dynamic_debug_remove(info.debug);
 unlock:
	mutex_unlock(&module_mutex);
	synchronize_sched();
	kfree(mod->args);
 free_arch_cleanup:
	module_arch_cleanup(mod);
 free_modinfo:
	free_modinfo(mod);
 free_unload:
	module_unload_free(mod);
 free_module:
	module_deallocate(mod, &info);
 free_copy:
	free_copy(&info);
	return ERR_PTR(err);
}

進入load_module()後定義了兩個重要的變量:

	struct load_info info = { NULL, };
	struct module *mod;

其中info是一個struct load_info結構體,這個結構體主要保存了ELF文件的一些基本信息:

struct load_info {
	Elf_Ehdr *hdr;
	unsigned long len;
	Elf_Shdr *sechdrs;
	char *secstrings, *strtab;
	unsigned long symoffs, stroffs;
	struct _ddebug *debug;
	unsigned int num_debug;
	struct {
		unsigned int sym, str, mod, vers, info, pcpu;
	} index;
};

hdr是ELF文件頭的指針,len是文件長度,sechdrs是段表指針,secstrings 和 strtab 分別是段表字符串表和字符串表的首地址。index結構體裏保存的是一些段在段表裏的索引號,看到有個mod段了吧,這個mod其實就是上面提到過的.gnu.linkonce.this_module段在段表中的下標。

struct module這個結構體的內容和.gnu.linkonce.this_module段的內容是一一對應的,定義有點複雜,用到的時候再看。

接着load_module()調用copy_and_check(),copy_and_check函數主要是vmalloc一塊臨時的內核空間,將用戶空間的ko文件整個複製進去,然後再檢查一下文件是否爲ELF類型、文件長度有沒有超過段表尾端等等。最後將ELF文件的一些信息賦值給info。

static int copy_and_check(struct load_info *info,
			  const void __user *umod, unsigned long len,
			  const char __user *uargs)
{
	int err;
	Elf_Ehdr *hdr;

	if (len < sizeof(*hdr))
		return -ENOEXEC;

	/* Suck in entire file: we'll want most of it. */
	if ((hdr = vmalloc(len)) == NULL)
		return -ENOMEM;

	if (copy_from_user(hdr, umod, len) != 0) {
		err = -EFAULT;
		goto free_hdr;
	}

	/* Sanity checks against insmoding binaries or wrong arch,
	   weird elf version */
	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
	    || hdr->e_type != ET_REL
	    || !elf_check_arch(hdr)
	    || hdr->e_shentsize != sizeof(Elf_Shdr)) {
		err = -ENOEXEC;
		goto free_hdr;
	}

	if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
		err = -ENOEXEC;
		goto free_hdr;
	}

	info->hdr = hdr;
	info->len = len;
    printk(KERN_ERR "temp space hdr: %x\n", hdr);
	return 0;

free_hdr:
	vfree(hdr);
	return err;
}

接着load_module()調用layout_and_allocate()函數,這個函數主要任務是決定ko文件中哪些段需要爲其分配地址空間,併爲ko文件中的每個需要加載的段計算並分配虛擬地址,也就是運行時地址。前面說過,ko文件是類似於.o目標文件的,所以它的每個段的虛擬地址就像目標文件一樣,都爲0,因此需要鏈接後才能運行。分配每個段的運行時地址就是鏈接的第一步。


static struct module *layout_and_allocate(struct load_info *info)
{
	/* Module within temporary copy. */
	struct module *mod;
	Elf_Shdr *pcpusec;
	int err;

	mod = setup_load_info(info);
	if (IS_ERR(mod))
		return mod;
    printk(KERN_ERR "mod name = %s\n", mod->name);
    printk(KERN_ERR "mod init = %s\n", mod->init);
	err = check_modinfo(mod, info);
	if (err)
		return ERR_PTR(err);

	/* Allow arches to frob section contents and sizes.  */
	err = module_frob_arch_sections(info->hdr, info->sechdrs,
					info->secstrings, mod);
	if (err < 0)
		goto out;

	pcpusec = &info->sechdrs[info->index.pcpu];
    printk(KERN_ERR "pcpusec size: %d\n",pcpusec->sh_size);
	if (pcpusec->sh_size) {
		/* We have a special allocation for this section. */
		err = percpu_modalloc(mod,
				      pcpusec->sh_size, pcpusec->sh_addralign);
		if (err)
			goto out;
		pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
	}

	/* Determine total sizes, and put offsets in sh_entsize.  For now
	   this is done generically; there doesn't appear to be any
	   special cases for the architectures. */
	layout_sections(mod, info);
	layout_symtab(mod, info);

	/* Allocate and move to the final place */
	err = move_module(mod, info);
	if (err)
		goto free_percpu;

	/* Module has been copied to its final place now: return it. */
	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
	kmemleak_load_module(mod, info);
	return mod;

free_percpu:
	percpu_modfree(mod);
out:
	return ERR_PTR(err);
}

layout_and_allocate()函數先調用setup_load_info()對info進一步初始化,接着調用layout_sections()分配各個段在最終虛擬地址上的偏移,.init段會被單獨分配偏移,因爲.init段的虛擬地址是單獨分配的,後面將詳述。然後調用layout_symtab()分配符號表和字符串表在虛擬地址上的偏移。最後調用move_module()將段移動到最終的虛擬地址上去。


setup_load_info()函數繼續用ELF信息去初始化info結構體,並調用rewrite_section_headers()。

static struct module *setup_load_info(struct load_info *info)
{
	unsigned int i;
	int err;
	struct module *mod;

	/* Set up the convenience variables */
	info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
	info->secstrings = (void *)info->hdr
		+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;

	err = rewrite_section_headers(info);
	if (err)
		return ERR_PTR(err);

	/* Find internal symbols and strings. */
	for (i = 1; i < info->hdr->e_shnum; i++) {
		if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
			info->index.sym = i;
			info->index.str = info->sechdrs[i].sh_link;
			info->strtab = (char *)info->hdr
				+ info->sechdrs[info->index.str].sh_offset;
			break;
		}
	}

	info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
	if (!info->index.mod) {
		printk(KERN_WARNING "No module found in object\n");
		return ERR_PTR(-ENOEXEC);
	}
	/* This is temporary: point mod into copy of data. */
	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
    printk(KERN_ERR "temp mod: %x, &mod->init: %x\n", mod, &(mod->init));
	if (info->index.sym == 0) {
		printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
		       mod->name);
		return ERR_PTR(-ENOEXEC);
	}

	info->index.pcpu = find_pcpusec(info);

	/* Check module struct version now, before we try to use module. */
	if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
		return ERR_PTR(-ENOEXEC);

	return mod;
}

rewrite_section_headers()這個函數將每個段的虛擬地址暫時設置爲其在臨時空間中的地址。並將info段和vers段的SHF_ALLOC符號清零,表示不爲這兩個段分配空間。

static int rewrite_section_headers(struct load_info *info)
{
	unsigned int i;

	/* This should always be true, but let's be sure. */
	info->sechdrs[0].sh_addr = 0;

	for (i = 1; i < info->hdr->e_shnum; i++) {
		Elf_Shdr *shdr = &info->sechdrs[i];
		if (shdr->sh_type != SHT_NOBITS
		    && info->len < shdr->sh_offset + shdr->sh_size) {
			printk(KERN_ERR "Module len %lu truncated\n",
			       info->len);
			return -ENOEXEC;
		}

		/* Mark all sections sh_addr with their address in the
		   temporary image. */
		shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
        printk(KERN_ERR "section:%s sh_addr: %x\n", info->secstrings + shdr->sh_name, shdr->sh_addr);
#ifndef CONFIG_MODULE_UNLOAD
		/* Don't load .exit sections */
		if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
			shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
	}

	/* Track but don't keep modinfo and version sections. */
	info->index.vers = find_sec(info, "__versions");
	info->index.info = find_sec(info, ".modinfo");
	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
	return 0;
}


rewrite_section_headers()返回後,將符號表和字符串表的信息記錄在info中。

	info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
這裏將.gnu.linkonce.this_module段在段表中的下標記錄在mod中。

        mod = (void *)info->sechdrs[info->index.mod].sh_addr;
現在,mod指針就指向臨時空間中的.gnu.linkonce.this_module段的地址了,而.gnu.linkonce.this_module段的內容是編譯器生成的並初始化的,因此struct  module這個結構體的初始值相當於編譯時就設置好了。不信可以打印mod->name看看,就是KO文件的文件名。

好了,現在程序執行完setup_load_info()返回到layout_and_allocate(),接着layout_and_allocate()調用layout_sections()。

static void layout_sections(struct module *mod, struct load_info *info)
{
	static unsigned long const masks[][2] = {
		/* NOTE: all executable code must be the first section
		 * in this array; otherwise modify the text_size
		 * finder in the two loops below */
		{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
		{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
		{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
		{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
	};
	unsigned int m, i;

	for (i = 0; i < info->hdr->e_shnum; i++)
		info->sechdrs[i].sh_entsize = ~0UL;

	pr_debug("Core section allocation order:\n");
    printk(KERN_ERR "masks array size: %d\n", ARRAY_SIZE(masks));
	for (m = 0; m < ARRAY_SIZE(masks); ++m) {
		for (i = 0; i < info->hdr->e_shnum; ++i) {
			Elf_Shdr *s = &info->sechdrs[i];
			const char *sname = info->secstrings + s->sh_name;
            printk(KERN_ERR "sname : %s\n", sname);
			if ((s->sh_flags & masks[m][0]) != masks[m][0]
			    || (s->sh_flags & masks[m][1])
			    || s->sh_entsize != ~0UL
			    || strstarts(sname, ".init")) {
                printk(KERN_ERR "skip..\n");
				continue;
            }
            printk(KERN_ERR "core_size: %d\n", mod->core_size);
			s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
            printk(KERN_ERR "sh_entsize: %d, core_size: %d\n", s->sh_entsize, mod->core_size);
			pr_debug("\t%s\n", sname);
		}
		switch (m) {
		case 0: /* executable */
			mod->core_size = debug_align(mod->core_size);
            printk(KERN_ERR "case 0 core_size = %d\n", mod->core_size);
			mod->core_text_size = mod->core_size;
			break;
		case 1: /* RO: text and ro-data */
			mod->core_size = debug_align(mod->core_size);
            printk(KERN_ERR "case 1 core_size = %d\n", mod->core_size);
			mod->core_ro_size = mod->core_size;
			break;
		case 3: /* whole core */
			mod->core_size = debug_align(mod->core_size);
			break;
		}
	}
    printk(KERN_ERR "parse init..\n");
	pr_debug("Init section allocation order:\n");
	for (m = 0; m < ARRAY_SIZE(masks); ++m) {
		for (i = 0; i < info->hdr->e_shnum; ++i) {
			Elf_Shdr *s = &info->sechdrs[i];
			const char *sname = info->secstrings + s->sh_name;
            printk(KERN_ERR "sname : %s\n", sname);
			if ((s->sh_flags & masks[m][0]) != masks[m][0]
			    || (s->sh_flags & masks[m][1])
			    || s->sh_entsize != ~0UL
			    || !strstarts(sname, ".init")) {
                printk(KERN_ERR "skip..\n");
				continue;
            }
            printk(KERN_ERR "init_size: %d\n", mod->init_size);
			s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
					 | INIT_OFFSET_MASK);
            printk(KERN_ERR "sh_entsize: %d, init_size: %d\n", s->sh_entsize, mod->init_size);
			pr_debug("\t%s\n", sname);
		}
		switch (m) {
		case 0: /* executable */
			mod->init_size = debug_align(mod->init_size);
			mod->init_text_size = mod->init_size;
			break;
		case 1: /* RO: text and ro-data */
			mod->init_size = debug_align(mod->init_size);
			mod->init_ro_size = mod->init_size;
			break;
		case 3: /* whole init */
			mod->init_size = debug_align(mod->init_size);
			break;
		}
	}
}

layout_sections()利用了struct module 裏的兩個成員變量:core_size 和 init_size,後面會看到,kernel爲ko文件分配最終虛擬地址的時候,實際上分配了兩塊地址,一塊叫core,另一塊叫init, 這兩個變量分別記錄了這兩塊地址的size。一個內核模塊爲什麼要分配兩塊地址呢?這是考慮到內核模塊的__init函數只運行一次,所以將它單獨放在一塊內存中可以方便運行結束後,回收這塊內存。。__init函數就是用 __init 宏定義的函數, #define __init        __section(.init.text) ,編譯器會將它放入ko文件的.init.text段中。

第一個for循環將所有段的sh_entsize設置爲一個特殊值——0xffffffff。這是個標記,凡是sh_entsize等於這個值的段,就是還未被分配虛擬空間偏移的段。

前面說了,爲ko文件分配的最終虛擬地址有兩塊,core空間和init空間,core_size和init_size記錄了這兩個空間的size,初始值爲0。

第二個for循環爲所有具有SHF_ALLOC標誌,並且非.init的段分配其在core虛擬空間的偏移,這些段後面將會被複制到core虛擬空間,這是不會被自動釋放,常駐內核的空間。

	s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
每個段在core空間的偏移記錄在sh_entsize中,偏移是通過get_offset得到的,這個函數很簡單,arch_mod_section_prepend()函數爲每個段額外的分配幾個byte,可以先認爲返回0,第一次調用傳入的*size爲0,ALIGN()宏是對齊用的宏,一般仍然返回*size,所以可以簡化爲ret = *size; 接着將core_size加上這個段的size,返回。後面再爲下一個段分配偏移的時候,core_size已經非0了,分配的偏移就是*size的大小,每次分配core_size都增加相應的段的size。

static long get_offset(struct module *mod, unsigned int *size,
		       Elf_Shdr *sechdr, unsigned int section)
{
	long ret;

	*size += arch_mod_section_prepend(mod, section);
	ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
	*size = ret + sechdr->sh_size;
        printk(KERN_ERR "sh_addralign: %d, ret: %ld, sh_size: %d\n", sechdr->sh_addralign, ret, sechdr->sh_size);
	return ret;
}

當第二個for循環完畢,第三個for循環就爲.init段分配其在init虛擬空間的偏移,分配方法和前面一樣,然後返回layout_and_allocate()函數。layout_and_allocate()函數接着調用layout_symtab()爲符號表和字符串表分配虛擬空間。symsect和strsect分別是表示符號表和字符串表的段描述符。符號表和字符串表會在core空間與init空間同時分配。


static void layout_symtab(struct module *mod, struct load_info *info)
{
	Elf_Shdr *symsect = info->sechdrs + info->index.sym;
	Elf_Shdr *strsect = info->sechdrs + info->index.str;
	const Elf_Sym *src;
	unsigned int i, nsrc, ndst, strtab_size;

	/* Put symbol section at end of init part of module. */
	symsect->sh_flags |= SHF_ALLOC;
	symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
					 info->index.sym) | INIT_OFFSET_MASK;
        printk(KERN_ERR "symsect->sh_entsize: %d\n", symsect->sh_entsize);
	pr_debug("\t%s\n", info->secstrings + symsect->sh_name);

	src = (void *)info->hdr + symsect->sh_offset;
	nsrc = symsect->sh_size / sizeof(*src);
        printk(KERN_ERR "symb num : %d\n", nsrc);

	/* strtab always starts with a nul, so offset 0 is the empty string. */
	strtab_size = 1;

	/* Compute total space required for the core symbols' strtab. */
	for (ndst = i = 0; i < nsrc; i++) {
		if (i == 0 ||
		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
			strtab_size += strlen(&info->strtab[src[i].st_name])+1;
			ndst++;
		}
	}

	/* Append room for core symbols at end of core part. */
	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
	info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
	mod->core_size += strtab_size;

	/* Put string table section at end of init part of module. */
	strsect->sh_flags |= SHF_ALLOC;
	strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
					 info->index.str) | INIT_OFFSET_MASK;
	pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}

先爲符號表在init空間分配偏移:

	symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
					 info->index.sym) | INIT_OFFSET_MASK;

接着爲“core符號”及其對應的字符串在core空間分配偏移,其實就是隻將部分符號表在core空間分配偏移,遍歷符號表,對每個符號表項調用is_core_symbol()函數判斷是否爲“core符號”,如果是,爲core符號對應的字符串分配空間,字符串空間記錄在strtab_size中。

	src = (void *)info->hdr + symsect->sh_offset;
	nsrc = symsect->sh_size / sizeof(*src);
        printk(KERN_ERR "symb num : %d\n", nsrc);

	/* strtab always starts with a nul, so offset 0 is the empty string. */
	strtab_size = 1;

	/* Compute total space required for the core symbols' strtab. */
	for (ndst = i = 0; i < nsrc; i++) {
		if (i == 0 ||
		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
			strtab_size += strlen(&info->strtab[src[i].st_name])+1;
			ndst++;
		}
	}

這三句就爲core空間的符號表與字符串表分配好了偏移。(注意:分配的偏移沒有記錄在sh_entsize中,只是記錄在info結構體中,也就是說只會爲core空間的符號表與字符串表預留好位置,不會真的將符號表、字符串表複製到core空間來)

	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
	info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
	mod->core_size += strtab_size;

最後爲字符串表分配init空間的偏移。

	strsect->sh_flags |= SHF_ALLOC;
	strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
					 info->index.str) | INIT_OFFSET_MASK;

返回layout_and_allocate()函數。調用move_module函數進行虛擬空間的實際申請,和段的加載操作。

static int move_module(struct module *mod, struct load_info *info)
{
	int i;
	void *ptr;

	/* Do the allocs. */
	ptr = module_alloc_update_bounds(mod->core_size);
	/*
	 * The pointer to this block is stored in the module structure
	 * which is inside the block. Just mark it as not being a
	 * leak.
	 */
	kmemleak_not_leak(ptr);
	if (!ptr)
		return -ENOMEM;

	memset(ptr, 0, mod->core_size);
	mod->module_core = ptr;

	ptr = module_alloc_update_bounds(mod->init_size);
	/*
	 * The pointer to this block is stored in the module structure
	 * which is inside the block. This block doesn't need to be
	 * scanned as it contains data and code that will be freed
	 * after the module is initialized.
	 */
	kmemleak_ignore(ptr);
	if (!ptr && mod->init_size) {
		module_free(mod, mod->module_core);
		return -ENOMEM;
	}
	memset(ptr, 0, mod->init_size);
	mod->module_init = ptr;

	/* Transfer each section which specifies SHF_ALLOC */
	pr_debug("final section addresses:\n");
	for (i = 0; i < info->hdr->e_shnum; i++) {
		void *dest;
		Elf_Shdr *shdr = &info->sechdrs[i];

		if (!(shdr->sh_flags & SHF_ALLOC))
			continue;

		if (shdr->sh_entsize & INIT_OFFSET_MASK)
			dest = mod->module_init
				+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
		else
			dest = mod->module_core + shdr->sh_entsize;
        printk(KERN_ERR "name: %s, dest : %x\n",info->secstrings + shdr->sh_name, dest);
		if (shdr->sh_type != SHT_NOBITS)
			memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
		/* Update sh_addr to point to copy in image. */
		shdr->sh_addr = (unsigned long)dest;
		pr_debug("\t0x%lx %s\n",
			 (long)shdr->sh_addr, info->secstrings + shdr->sh_name);
	}

	return 0;
}

先爲core空間申請一塊大小爲core_size的內存,將其首地址賦值給struct module結構體的module_core成員:

    ptr = module_alloc_update_bounds(mod->core_size);

	mod->module_core = ptr;
再爲init空間申請一塊大小爲init_size的內存,將其首地址賦值給struct module結構體的module_init成員:

	ptr = module_alloc_update_bounds(mod->init_size);
	mod->module_init = ptr;
下面的for循環對每個有SHF_ALLOC標記的段分配絕對虛擬地址(前面分配的只是各個段相對於未來要分配的虛擬地址的偏移,也就是相對於module_core和module_init的偏移)。分配絕對虛擬地址很簡單,將申請的虛擬空間的地址(分別保存在module_core和module_init中)直接加上之前分配好的偏移量就行了。如下:(符號表和字符串表的絕對虛擬地址都被分配到了init空間內,所以後面搬移的時候是把這兩個表搬移到了init空間而非core空間)

		if (shdr->sh_entsize & INIT_OFFSET_MASK)
			dest = mod->module_init
				+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
		else
			dest = mod->module_core + shdr->sh_entsize;
開始段的搬移,將段從臨時內核空間,搬移到運行時的虛擬地址上去:

		if (shdr->sh_type != SHT_NOBITS)
			memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
最後把絕對虛擬地址賦值給相應段表項的sh_addr成員。返回。

到目前爲止,ko文件的對應的段,已經被加載進了內核空間,每個被加載的段都有了自己的運行時地址。但現在還不能運行,因爲代碼中對符號的引用還沒有修正,也就是還沒有進行符號重定位。前面說過ko文件類似與.o文件,.o文件對全局符號的引用都是待重定位的,是需要鏈接器對符號進行鏈接的,ko文件也一樣。只不過ko文件代碼中對符號的引用是由內核來進行重定位的。

move_module()返回後,返回到layout_and_allocate()函數中。layout_and_allocate()函數最後將mod指針變量重新指向搬移後的.gnu.linkonce.this_module段的虛擬地址值。

	mod = (void *)info->sechdrs[info->index.mod].sh_addr;

現在layout_and_allocate()函數執行完了,返回到load_module函數繼續往下執行,

	find_module_sections(mod, &info);
這主要是對info進一步初始化。

接着調用simplify_symbols()函數,這個函數將符號表裏的符號的絕對地址寫入到st_value域中(符號表、字符串表現在都在init空間了)。

for循環遍歷init空間的符號表,分析每個符號表項的st_shndx域,st_shndx通常表示符號所在的段,但它有三個特殊值:SHN_ABS,SHN_COMMON,SHN_UNDEF。所以函數中分了4種case來進行處理。SHN_COMMON和SHN_ABS的case先不考慮,因爲我們的代碼中的符號主要分兩種,模塊內的符號和內核export出的符號(如printk),這兩種符號的st_shndx大部分對應default和SHN_UNDEF這兩種case。

對於模塊內的符號,程序進入default進行處理,處理很簡單,st_value = st_value + 符號所在段的絕對虛擬地址(st_value中原本保存着符號在其所在段的offset)。這樣一來,st_value中現在保存的就是符號的絕對虛擬地址了。

對於內核導出的符號,由於它在模塊中沒有定義,所以它的st_shndx爲SHN_UNDEF。對於SHN_UNDEF 這種case的處理過程如下:

1:調用resolve_symbol_wait()函數解析內核符號,這個函數返回一個struct kernel_symbol結構體。

2:將這個結構體的value成員直接賦值給st_value。

結構體定義在include/linux/export.h中:

struct kernel_symbol
{
	unsigned long value;
	const char *name;
};

static int simplify_symbols(struct module *mod, const struct load_info *info)
{
	Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
	Elf_Sym *sym = (void *)symsec->sh_addr;
	unsigned long secbase;
	unsigned int i;
	int ret = 0;
	const struct kernel_symbol *ksym;

	for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
		const char *name = info->strtab + sym[i].st_name;
        printk(KERN_ERR "symb name: %s\n", name);
		switch (sym[i].st_shndx) {
		case SHN_COMMON:
			/* We compiled with -fno-common.  These are not
			   supposed to happen.  */
			pr_debug("Common symbol: %s\n", name);
			printk("%s: please compile with -fno-common\n",
			       mod->name);
			ret = -ENOEXEC;
			break;

		case SHN_ABS:
			/* Don't need to do anything */
			pr_debug("Absolute symbol: 0x%08lx\n",
			       (long)sym[i].st_value);
			break;

		case SHN_UNDEF:
            printk(KERN_ERR "Undefine symb!!\n");
			ksym = resolve_symbol_wait(mod, info, name);
            printk(KERN_ERR "resolve ok!\n");
			/* Ok if resolved.  */
			if (ksym && !IS_ERR(ksym)) {
				sym[i].st_value = ksym->value;
				break;
			}

			/* Ok if weak.  */
			if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
				break;

			printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
			       mod->name, name, PTR_ERR(ksym));
			ret = PTR_ERR(ksym) ?: -ENOENT;
			break;

		default:
			/* Divert to percpu allocation if a percpu var. */
			if (sym[i].st_shndx == info->index.pcpu)
				secbase = (unsigned long)mod_percpu(mod);
			else
				secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
            printk(KERN_ERR "section base: %x\n", secbase);
			sym[i].st_value += secbase;
            printk(KERN_ERR "st_value: %x\n", sym[i].st_value);
			break;
		}
	}

	return ret;
}

resolve_symbol_wait()函數用來解析內核導出的符號。不是所有的內核符號都默認導出的,默認內核中的符號在運行時是對外“不可見的”,而內核本身對那些符號地址的引用,都是靜態編譯鏈接內核時,鏈接器寫進去的。所以外部模塊無法得到內核符號的地址。如果外部模塊想要引用內核符號,除非內核將符號地址導出來!內核中的符號可以通過EXPORT_SYMBOL()宏來導出,這個宏就是將符號信息保存在一個struct kernel_symbol結構體中,再將這個結構體編譯進內核的一個特殊段,以後如果外部想引用這個符號,只需要在這個段中尋找對應的符號的struct kernel_symbol結構體就行了。更詳細的細節google上有很多。

現在來看這個resolve_symbol_wait()函數:

static const struct kernel_symbol *
resolve_symbol_wait(struct module *mod,
		    const struct load_info *info,
		    const char *name)
{
	const struct kernel_symbol *ksym;
	char owner[MODULE_NAME_LEN];

	if (wait_event_interruptible_timeout(module_wq,
			!IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
			|| PTR_ERR(ksym) != -EBUSY,
					     30 * HZ) <= 0) {
		printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
		       mod->name, owner);
	}
	return ksym;
}

resolve_symbol_wait()函數會先調用resolve_symbol()解析內核導出符號。

因此進入resolve_symbol()函數:

static const struct kernel_symbol *resolve_symbol(struct module *mod,
						  const struct load_info *info,
						  const char *name,
						  char ownername[])
{
	struct module *owner;
	const struct kernel_symbol *sym;
	const unsigned long *crc;
	int err;
    printk(KERN_ERR "resolve symbol: %s...\n", name);
	mutex_lock(&module_mutex);
	sym = find_symbol(name, &owner, &crc,
			  !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
	if (!sym)
		goto unlock;

	if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
			   owner)) {
		sym = ERR_PTR(-EINVAL);
		goto getname;
	}

	err = ref_module(mod, owner);
	if (err) {
		sym = ERR_PTR(err);
		goto getname;
	}

getname:
	/* We must make copy under the lock if we failed to get ref. */
	strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
unlock:
	mutex_unlock(&module_mutex);
	return sym;
}

resolve_symbol()函數繼續調用find_symbol()函數,返回一個struct kernel_symbol結構指針。

const struct kernel_symbol *find_symbol(const char *name,
					struct module **owner,
					const unsigned long **crc,
					bool gplok,
					bool warn)
{
	struct find_symbol_arg fsa;

	fsa.name = name;
	fsa.gplok = gplok;
	fsa.warn = warn;

	if (each_symbol_section(find_symbol_in_section, &fsa)) {
		if (owner)
			*owner = fsa.owner;
		if (crc)
			*crc = fsa.crc;
		return fsa.sym;
	}

	pr_debug("Failed to find symbol %s\n", name);
	return NULL;
}

進入find_symbol()函數,函數內部先構造一個搜索符號的參數 fsa,並根據要解析的內核符號初始化這個參數。接着以&fsa和find_symbol_in_section這個函數指針爲參數調用each_symbol_section()函數。

bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
				    struct module *owner,
				    void *data),
			 void *data)
{
	struct module *mod;
	static const struct symsearch arr[] = {
		{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
		  NOT_GPL_ONLY, false },
		{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
		  __start___kcrctab_gpl,
		  GPL_ONLY, false },
		{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
		  __start___kcrctab_gpl_future,
		  WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
		{ __start___ksymtab_unused, __stop___ksymtab_unused,
		  __start___kcrctab_unused,
		  NOT_GPL_ONLY, true },
		{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
		  __start___kcrctab_unused_gpl,
		  GPL_ONLY, true },
#endif
	};

	if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
		return true;

	list_for_each_entry_rcu(mod, &modules, list) {
		struct symsearch arr[] = {
			{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
			  NOT_GPL_ONLY, false },
			{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
			  mod->gpl_crcs,
			  GPL_ONLY, false },
			{ mod->gpl_future_syms,
			  mod->gpl_future_syms + mod->num_gpl_future_syms,
			  mod->gpl_future_crcs,
			  WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
			{ mod->unused_syms,
			  mod->unused_syms + mod->num_unused_syms,
			  mod->unused_crcs,
			  NOT_GPL_ONLY, true },
			{ mod->unused_gpl_syms,
			  mod->unused_gpl_syms + mod->num_unused_gpl_syms,
			  mod->unused_gpl_crcs,
			  GPL_ONLY, true },
#endif
		};

		if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
			return true;
	}
	return false;
}

這個函數裏有個static數組,數組元素是struct symsearch結構體,這個結構體是用來描述一個內核符號表的,start stop 分別描述表的頭尾,crcs暫時不管,licence描述符號的“證書”。

struct symsearch {
	const struct kernel_symbol *start, *stop;
	const unsigned long *crcs;
	enum {
		NOT_GPL_ONLY,
		GPL_ONLY,
		WILL_BE_GPL_ONLY,
	} licence;
	bool unused;
};

可以看出內核定義了3~5個內核符號表。。這些描述符號表的結構體的成員都已經賦了值,像__start___ksymtab,__stop___ksymtab等等,這些值定義在arch/arm/kernel/vmlinux.lds中,也就是定義在鏈接腳本中。前面說過,內核符號通過EXPORT_SYMBOL()宏導出到一個特殊段,在鏈接內核的時候,鏈接腳本就將這些段合併爲幾個內核符號表段,並定義了幾個標誌開始和結束地址的符號,如__start___ksymtab,__stop___ksymtab就標識了___ksymtab符號表段的開始地址和結束地址。

	static const struct symsearch arr[] = {
		{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
		  NOT_GPL_ONLY, false },
		{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
		  __start___kcrctab_gpl,
		  GPL_ONLY, false },
		{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
		  __start___kcrctab_gpl_future,
		  WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
		{ __start___ksymtab_unused, __stop___ksymtab_unused,
		  __start___kcrctab_unused,
		  NOT_GPL_ONLY, true },
		{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
		  __start___kcrctab_unused_gpl,
		  GPL_ONLY, true },
#endif
	};

接着回到each_symbol_section()函數,函數調用each_symbol_in_section(),參數是這個static數組指針和數組的size,owner爲NULL,*fn就是 find_symbol_in_section,data其實是&fsa這個指針。each_symbol_in_section()這個函數做的事很簡單,就是遍歷arr數組中的每個內核符號表,調用find_symbol_in_section函數在每個內核符號表裏搜索&fsa指定的符號。

static bool each_symbol_in_section(const struct symsearch *arr,
				   unsigned int arrsize,
				   struct module *owner,
				   bool (*fn)(const struct symsearch *syms,
					      struct module *owner,
					      void *data),
				   void *data)
{
	unsigned int j;

	for (j = 0; j < arrsize; j++) {
		if (fn(&arr[j], owner, data))
			return true;
	}

	return false;
}

find_symbol_in_section函數如下,先將data指針轉化爲struct find_symbol_arg結構體指針,再調用bsearch在syms描述的內核符號表裏搜索fsa指定的內核符號,我們假設搜索到了這個符號,搜索的結果就存放在struct kernel_symbol這個結構體中。

static bool find_symbol_in_section(const struct symsearch *syms,
				   struct module *owner,
				   void *data)
{
	struct find_symbol_arg *fsa = data;
	struct kernel_symbol *sym;

	sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
			sizeof(struct kernel_symbol), cmp_name);
        if(sym != NULL)
                printk(KERN_ERR "name: %s, sym value: %x\n", fsa->name, sym->value);

	if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data))
		return true;

	return false;
}

bsearch函數就是用“二分法”查表,不說了。。

void *bsearch(const void *key, const void *base, size_t num, size_t size,
	      int (*cmp)(const void *key, const void *elt))
{
	size_t start = 0, end = num;
	int result;

	while (start < end) {
		size_t mid = start + (end - start) / 2;

		result = cmp(key, base + mid * size);
		if (result < 0)
			end = mid;
		else if (result > 0)
			start = mid + 1;
		else
			return (void *)base + mid * size;
	}

	return NULL;
}
bsearch返回後,調用check_symbol()函數,這個函數在返回前將搜索到的內核導出符號的struct kernel_symbol結構指針賦值給fsa->sym。

static bool check_symbol(const struct symsearch *syms,
				 struct module *owner,
				 unsigned int symnum, void *data)
{
	struct find_symbol_arg *fsa = data;

	if (!fsa->gplok) {
		if (syms->licence == GPL_ONLY)
			return false;
		if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
			printk(KERN_WARNING "Symbol %s is being used "
			       "by a non-GPL module, which will not "
			       "be allowed in the future\n", fsa->name);
			printk(KERN_WARNING "Please see the file "
			       "Documentation/feature-removal-schedule.txt "
			       "in the kernel source tree for more details.\n");
		}
	}

#ifdef CONFIG_UNUSED_SYMBOLS
	if (syms->unused && fsa->warn) {
		printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
		       "however this module is using it.\n", fsa->name);
		printk(KERN_WARNING
		       "This symbol will go away in the future.\n");
		printk(KERN_WARNING
		       "Please evalute if this is the right api to use and if "
		       "it really is, submit a report the linux kernel "
		       "mailinglist together with submitting your code for "
		       "inclusion.\n");
	}
#endif

	fsa->owner = owner;
	fsa->crc = symversion(syms->crcs, symnum);
	fsa->sym = &syms->start[symnum];
	return true;
}

check_symbol()函數返回後,find_symbol_in_section函數也返回true,並向上層層返回到find_symbol(),find_symbol()函數將fsa->sym返回至resolve_symbol()函數,接着resolve_symbol()函數調用ref_module(),第一個參數a是現在正在加載的模塊,第二個參數是a模塊用到的模塊,此處爲NULL。

int ref_module(struct module *a, struct module *b)
{
	int err;

	if (b == NULL || already_uses(a, b))
		return 0;

	/* If module isn't available, we fail. */
	err = strong_try_module_get(b);
	if (err)
		return err;

	err = add_module_usage(a, b);
	if (err) {
		module_put(b);
		return err;
	}
	return 0;
}

因爲這裏b = NULL,所以這裏直接返回0;

返回到resolve_symbol(),resolve_symbol()最後將描述內核導出符號的struct kernel_symbol結構體的指針sym返回。

現在我們已經返回到resolve_symbol_wait()函數了,接下來調用wait_event_interruptible_timeout(),因爲我們剛纔已經得到了內核導出符號,那麼現在就不用睡眠等待,否則會睡眠。最後resolve_symbol_wait()返回resolve_symbol()剛剛返回的sym。

向上返回到simplify_symbols(),前面說過,將這個struct kernel_symbol結構體的value成員直接賦值給st_value。然後返回。

這樣繞了一大圈,simplify_symbols()終於返回了。。。simplify_symbols()返回後,在init空間的符號表的每個符號表項中,st_value域就指向符號的絕對虛擬地址值了。-_-

接下來load_module()調用apply_relocations()進行真正的重定位工作。

static int apply_relocations(struct module *mod, const struct load_info *info)
{
	unsigned int i;
	int err = 0;

	/* Now do relocations. */
	for (i = 1; i < info->hdr->e_shnum; i++) {
		unsigned int infosec = info->sechdrs[i].sh_info;

		/* Not a valid relocation section? */
		if (infosec >= info->hdr->e_shnum)
			continue;

		/* Don't bother with non-allocated sections */
		if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
			continue;

		if (info->sechdrs[i].sh_type == SHT_REL) {
                    printk(KERN_ERR "relocate section : %s,  type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_REL");
		    err = apply_relocate(info->sechdrs, info->strtab,
					     info->index.sym, i, mod);
                    printk(KERN_ERR "mod init: %x\n", mod->init);
                }
		else if (info->sechdrs[i].sh_type == SHT_RELA) {
                    printk(KERN_ERR "relocate section : %s,  type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_RELA");
	            err = apply_relocate_add(info->sechdrs, info->strtab,
						 info->index.sym, i, mod);
                }
		if (err < 0)
			break;
	}
	return err;
}

for循環遍歷臨時內核空間的各個段,篩選出其中有效的重定位表段,對重定位表所作用的段進行重定位。重定位段的類型主要有SHT_REL和SHT_RELA,以SHT_REL爲例,當重定位表段的類型是SHT_REL時,調用apply_relocate()進行重定位。apply_relocate()位於arch/arm/kernel/module.c中。


int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
	       unsigned int relindex, struct module *module)
{
	Elf32_Shdr *symsec = sechdrs + symindex;
	Elf32_Shdr *relsec = sechdrs + relindex;
	Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
	Elf32_Rel *rel = (void *)relsec->sh_addr;
	unsigned int i;

	for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
		unsigned long loc;
		Elf32_Sym *sym;
		const char *symname;
		s32 offset;
#ifdef CONFIG_THUMB2_KERNEL
		u32 upper, lower, sign, j1, j2;
#endif

		offset = ELF32_R_SYM(rel->r_info);
		if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
			pr_err("%s: section %u reloc %u: bad relocation sym offset\n",
				module->name, relindex, i);
			return -ENOEXEC;
		}

		sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
		symname = strtab + sym->st_name;
		if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
			pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n",
			       module->name, relindex, i, symname,
			       rel->r_offset, dstsec->sh_size);
			return -ENOEXEC;
		}

		loc = dstsec->sh_addr + rel->r_offset;

		switch (ELF32_R_TYPE(rel->r_info)) {
		case R_ARM_NONE:
			/* ignore */
			break;

		case R_ARM_ABS32:
			*(u32 *)loc += sym->st_value;
			break;

		case R_ARM_PC24:
		case R_ARM_CALL:
		case R_ARM_JUMP24:
			offset = (*(u32 *)loc & 0x00ffffff) << 2;
			if (offset & 0x02000000)
				offset -= 0x04000000;

			offset += sym->st_value - loc;
			if (offset & 3 ||
			    offset <= (s32)0xfe000000 ||
			    offset >= (s32)0x02000000) {
				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
				       module->name, relindex, i, symname,
				       ELF32_R_TYPE(rel->r_info), loc,
				       sym->st_value);
				return -ENOEXEC;
			}

			offset >>= 2;

			*(u32 *)loc &= 0xff000000;
			*(u32 *)loc |= offset & 0x00ffffff;
			break;

	       case R_ARM_V4BX:
		       /* Preserve Rm and the condition code. Alter
			* other bits to re-code instruction as
			* MOV PC,Rm.
			*/
		       *(u32 *)loc &= 0xf000000f;
		       *(u32 *)loc |= 0x01a0f000;
		       break;

		case R_ARM_PREL31:
			offset = *(u32 *)loc + sym->st_value - loc;
			*(u32 *)loc = offset & 0x7fffffff;
			break;

		case R_ARM_MOVW_ABS_NC:
		case R_ARM_MOVT_ABS:
			offset = *(u32 *)loc;
			offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
			offset = (offset ^ 0x8000) - 0x8000;

			offset += sym->st_value;
			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
				offset >>= 16;

			*(u32 *)loc &= 0xfff0f000;
			*(u32 *)loc |= ((offset & 0xf000) << 4) |
					(offset & 0x0fff);
			break;

#ifdef CONFIG_THUMB2_KERNEL
		case R_ARM_THM_CALL:
		case R_ARM_THM_JUMP24:
			upper = *(u16 *)loc;
			lower = *(u16 *)(loc + 2);

			/*
			 * 25 bit signed address range (Thumb-2 BL and B.W
			 * instructions):
			 *   S:I1:I2:imm10:imm11:0
			 * where:
			 *   S     = upper[10]   = offset[24]
			 *   I1    = ~(J1 ^ S)   = offset[23]
			 *   I2    = ~(J2 ^ S)   = offset[22]
			 *   imm10 = upper[9:0]  = offset[21:12]
			 *   imm11 = lower[10:0] = offset[11:1]
			 *   J1    = lower[13]
			 *   J2    = lower[11]
			 */
			sign = (upper >> 10) & 1;
			j1 = (lower >> 13) & 1;
			j2 = (lower >> 11) & 1;
			offset = (sign << 24) | ((~(j1 ^ sign) & 1) << 23) |
				((~(j2 ^ sign) & 1) << 22) |
				((upper & 0x03ff) << 12) |
				((lower & 0x07ff) << 1);
			if (offset & 0x01000000)
				offset -= 0x02000000;
			offset += sym->st_value - loc;

			/*
			 * For function symbols, only Thumb addresses are
			 * allowed (no interworking).
			 *
			 * For non-function symbols, the destination
			 * has no specific ARM/Thumb disposition, so
			 * the branch is resolved under the assumption
			 * that interworking is not required.
			 */
			if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
				!(offset & 1)) ||
			    offset <= (s32)0xff000000 ||
			    offset >= (s32)0x01000000) {
				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
				       module->name, relindex, i, symname,
				       ELF32_R_TYPE(rel->r_info), loc,
				       sym->st_value);
				return -ENOEXEC;
			}

			sign = (offset >> 24) & 1;
			j1 = sign ^ (~(offset >> 23) & 1);
			j2 = sign ^ (~(offset >> 22) & 1);
			*(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) |
					    ((offset >> 12) & 0x03ff));
			*(u16 *)(loc + 2) = (u16)((lower & 0xd000) |
						  (j1 << 13) | (j2 << 11) |
						  ((offset >> 1) & 0x07ff));
			break;

		case R_ARM_THM_MOVW_ABS_NC:
		case R_ARM_THM_MOVT_ABS:
			upper = *(u16 *)loc;
			lower = *(u16 *)(loc + 2);

			/*
			 * MOVT/MOVW instructions encoding in Thumb-2:
			 *
			 * i	= upper[10]
			 * imm4	= upper[3:0]
			 * imm3	= lower[14:12]
			 * imm8	= lower[7:0]
			 *
			 * imm16 = imm4:i:imm3:imm8
			 */
			offset = ((upper & 0x000f) << 12) |
				((upper & 0x0400) << 1) |
				((lower & 0x7000) >> 4) | (lower & 0x00ff);
			offset = (offset ^ 0x8000) - 0x8000;
			offset += sym->st_value;

			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
				offset >>= 16;

			*(u16 *)loc = (u16)((upper & 0xfbf0) |
					    ((offset & 0xf000) >> 12) |
					    ((offset & 0x0800) >> 1));
			*(u16 *)(loc + 2) = (u16)((lower & 0x8f00) |
						  ((offset & 0x0700) << 4) |
						  (offset & 0x00ff));
			break;
#endif

		default:
			printk(KERN_ERR "%s: unknown relocation: %u\n",
			       module->name, ELF32_R_TYPE(rel->r_info));
			return -ENOEXEC;
		}
	}
	return 0;
}

這個函數所做的大概工作就是遍歷重定位表項,對每個重定位項,找到重定位入口地址,再根據符號表得到符號的絕對虛擬地址,再根據重定位入口的類型,進行對應的地址修正。總之,重定位完成後,代碼中對符號的引用,都將會被修正爲符號在內核的正確地址。

這裏有一點要注意下,前面不是提到.gnu.linkonce.this_module段嗎?這個段也有一個自己的重定位表,叫.rel.gnu.linkonce.this_module,這個重定位表裏只有兩個重定位表項,還記得前面提到的struct module結構體嗎?現在給出struct module結構體的定義。。module結構內有兩個成員,init和exit。這兩個成員存放着模塊的__init函數和__exit函數的指針,.rel.gnu.linkonce.this_module重定位表中的兩個重定位項就分別對應着.gnu.linkonce.this_module段中的這兩個指針!

也就是說,這兩個指針的值也會被apply_relocate()函數重定位,重定位這兩個指針有什麼用呢?因爲後面將會用這兩個指針,調用模塊的__init函數和__exit函數。

struct module
{
	enum module_state state;

	/* Member of list of modules */
	struct list_head list;

	/* Unique handle for this module */
	char name[MODULE_NAME_LEN];

	/* Sysfs stuff. */
	struct module_kobject mkobj;
	struct module_attribute *modinfo_attrs;
	const char *version;
	const char *srcversion;
	struct kobject *holders_dir;

	/* Exported symbols */
	const struct kernel_symbol *syms;
	const unsigned long *crcs;
	unsigned int num_syms;

	/* Kernel parameters. */
	struct kernel_param *kp;
	unsigned int num_kp;

	/* GPL-only exported symbols. */
	unsigned int num_gpl_syms;
	const struct kernel_symbol *gpl_syms;
	const unsigned long *gpl_crcs;

#ifdef CONFIG_UNUSED_SYMBOLS
	/* unused exported symbols. */
	const struct kernel_symbol *unused_syms;
	const unsigned long *unused_crcs;
	unsigned int num_unused_syms;

	/* GPL-only, unused exported symbols. */
	unsigned int num_unused_gpl_syms;
	const struct kernel_symbol *unused_gpl_syms;
	const unsigned long *unused_gpl_crcs;
#endif

	/* symbols that will be GPL-only in the near future. */
	const struct kernel_symbol *gpl_future_syms;
	const unsigned long *gpl_future_crcs;
	unsigned int num_gpl_future_syms;

	/* Exception table */
	unsigned int num_exentries;
	struct exception_table_entry *extable;

	/* Startup function. */
	int (*init)(void);

	/* If this is non-NULL, vfree after init() returns */
	void *module_init;

	/* Here is the actual code + data, vfree'd on unload. */
	void *module_core;

	/* Here are the sizes of the init and core sections */
	unsigned int init_size, core_size;

	/* The size of the executable code in each section.  */
	unsigned int init_text_size, core_text_size;

	/* Size of RO sections of the module (text+rodata) */
	unsigned int init_ro_size, core_ro_size;

	/* Arch-specific module values */
	struct mod_arch_specific arch;

	unsigned int taints;	/* same bits as kernel:tainted */

#ifdef CONFIG_GENERIC_BUG
	/* Support for BUG */
	unsigned num_bugs;
	struct list_head bug_list;
	struct bug_entry *bug_table;
#endif

#ifdef CONFIG_KALLSYMS
	/*
	 * We keep the symbol and string tables for kallsyms.
	 * The core_* fields below are temporary, loader-only (they
	 * could really be discarded after module init).
	 */
	Elf_Sym *symtab, *core_symtab;
	unsigned int num_symtab, core_num_syms;
	char *strtab, *core_strtab;

	/* Section attributes */
	struct module_sect_attrs *sect_attrs;

	/* Notes attributes */
	struct module_notes_attrs *notes_attrs;
#endif

	/* The command line arguments (may be mangled).  People like
	   keeping pointers to this stuff */
	char *args;

#ifdef CONFIG_SMP
	/* Per-cpu data. */
	void __percpu *percpu;
	unsigned int percpu_size;
#endif

#ifdef CONFIG_TRACEPOINTS
	unsigned int num_tracepoints;
	struct tracepoint * const *tracepoints_ptrs;
#endif
#ifdef HAVE_JUMP_LABEL
	struct jump_entry *jump_entries;
	unsigned int num_jump_entries;
#endif
#ifdef CONFIG_TRACING
	unsigned int num_trace_bprintk_fmt;
	const char **trace_bprintk_fmt_start;
#endif
#ifdef CONFIG_EVENT_TRACING
	struct ftrace_event_call **trace_events;
	unsigned int num_trace_events;
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
	unsigned int num_ftrace_callsites;
	unsigned long *ftrace_callsites;
#endif

#ifdef CONFIG_MODULE_UNLOAD
	/* What modules depend on me? */
	struct list_head source_list;
	/* What modules do I depend on? */
	struct list_head target_list;

	/* Who is waiting for us to be unloaded */
	struct task_struct *waiter;

	/* Destruction function. */
	void (*exit)(void);

	struct module_ref __percpu *refptr;
#endif

#ifdef CONFIG_CONSTRUCTORS
	/* Constructor functions. */
	ctor_fn_t *ctors;
	unsigned int num_ctors;
#endif
};

繼續一路返回到load_module()函數,load_module()後面的代碼不看,一路返回到sys_init_module(),在sys_init_module()中,後面會調用do_one_initcall(),參數就是被重定位過的init指針。

	if (mod->init != NULL)
		ret = do_one_initcall(mod->init);

這個do_one_initcall()函數會調用這個init指針所指向的函數,至此,我們模塊的__init函數就被調用了。

(__init函數是指用__init前綴定義的函數,__exit函數是指用__exit前綴定義的函數,前面提過。)






發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章