閱讀本文需要先了解 ELF文件格式 的相關知識,以下引用的kernel源代碼,都是基於linux kernel源代碼版本:3.4。
linux內核模塊分兩種形態,一是靜態編譯進內核的模塊,二是用insmod命令動態加載的模塊,也就是後綴名爲KO的文件。這裏主要討論linux內核動態加載模塊的過程,也就是KO文件被動態加載進內核,並運行的過程。
後綴爲KO的文件其實是一種ELF格式文件,很類似於ELF目標文件(.o文件),但是又與ELF目標文件有一點小區別。使用readelf工具可以看到,KO文件裏有一個叫.gnu.linkonce.this_module的段,而普通目標文件是沒有這個段的。這個段的內容其實是一個struct module結構體(段的地址就等於module結構體的首地址),記錄了KO模塊的一些信息,這個結構體在linux kernel源代碼裏也有定義(include/linux/module.h),因爲內核在加載模塊時要用到這個結構體。
當linux順利啓動,進入shell的時候,就可以輸入insmod命令,加載我們自己的內核模塊拉。insmod命令封裝了一個叫 sys_init_module 的系統調用,sys_init_module源碼如下:
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
{
struct module *mod;
int ret = 0;
/* Must have permission */
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
/* Do all the hard work */
mod = load_module(umod, len, uargs);
if (IS_ERR(mod))
return PTR_ERR(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
/* Set RO and NX regions for core */
set_section_ro_nx(mod->module_core,
mod->core_text_size,
mod->core_ro_size,
mod->core_size);
/* Set RO and NX regions for init */
set_section_ro_nx(mod->module_init,
mod->init_text_size,
mod->init_ro_size,
mod->init_size);
do_mod_ctors(mod);
/* Start the module */
printk(KERN_ERR "mod->init = %p\n", mod->init);
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
free_module(mod);
wake_up(&module_wq);
return ret;
}
if (ret > 0) {
printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();
}
/* Now it's a first class citizen! Wake up anyone waiting for it. */
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
/* We need to finish all async code before the module init sequence is done */
async_synchronize_full();
mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
mod->num_symtab = mod->core_num_syms;
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
unset_module_init_ro_nx(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_ro_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
return 0;
}
這是一個用宏定義的函數,展開後函數名就是sys_init_module,參數有3個,umod是ko文件在用戶空間的首地址,len是ko文件的大小,uargs是用戶空間的參數指針。進入這個函數後先check一下permission,然後調用load_module(),加載ko模塊的工作主要就是在load_module()這個函數中完成的。
static struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
struct load_info info = { NULL, };
struct module *mod;
long err;
pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
printk(KERN_ERR "load_module: umod=%p, len=%lu, uargs=%p\n", umod, len, uargs);
printk(KERN_ERR "load_module: umod=%x, len=%lu, uargs=%p\n", umod, len, uargs);
/* Copy in the blobs from userspace, check they are vaguely sane. */
err = copy_and_check(&info, umod, len, uargs);
if (err)
return ERR_PTR(err);
/* Figure out module layout, and allocate all the memory. */
mod = layout_and_allocate(&info);
printk(KERN_ERR "..mod->init: %x\n", mod->init);
if (IS_ERR(mod)) {
err = PTR_ERR(mod);
goto free_copy;
}
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
goto free_module;
/* Now we've got everything in the final locations, we can
* find optional sections. */
find_module_sections(mod, &info);
err = check_module_license_and_versions(mod);
if (err)
goto free_unload;
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, &info);
/* Fix up syms, so that st_value is a pointer to location. */
printk(KERN_ERR "---------------------------------------\n");
err = simplify_symbols(mod, &info);
if (err < 0)
goto free_modinfo;
printk(KERN_ERR "11..mod->init: %x\n", mod->init);
err = apply_relocations(mod, &info);
printk(KERN_ERR "22..mod->init: %x\n", mod->init);
if (err < 0)
goto free_modinfo;
err = post_relocation(mod, &info);
if (err < 0)
goto free_modinfo;
flush_module_icache(mod);
/* Now copy in args */
mod->args = strndup_user(uargs, ~0UL >> 1);
if (IS_ERR(mod->args)) {
err = PTR_ERR(mod->args);
goto free_arch_cleanup;
}
/* Mark state as coming so strong_try_module_get() ignores us. */
mod->state = MODULE_STATE_COMING;
/* Now sew it into the lists so we can get lockdep and oops
* info during argument parsing. No one should access us, since
* strong_try_module_get() will fail.
* lockdep/oops can run asynchronous, so use the RCU list insertion
* function to insert in a way safe to concurrent readers.
* The mutex protects against concurrent writers.
*/
mutex_lock(&module_mutex);
if (find_module(mod->name)) {
err = -EEXIST;
goto unlock;
}
/* This has to be done once we're sure module name is unique. */
dynamic_debug_setup(info.debug, info.num_debug);
/* Find duplicate symbols */
err = verify_export_symbols(mod);
if (err < 0)
goto ddebug;
module_bug_finalize(info.hdr, info.sechdrs, mod);
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-32768, 32767, NULL);
if (err < 0)
goto unlink;
/* Link in to syfs. */
err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
if (err < 0)
goto unlink;
/* Get rid of temporary copy. */
free_copy(&info);
/* Done! */
trace_module_load(mod);
return mod;
unlink:
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
module_bug_cleanup(mod);
ddebug:
dynamic_debug_remove(info.debug);
unlock:
mutex_unlock(&module_mutex);
synchronize_sched();
kfree(mod->args);
free_arch_cleanup:
module_arch_cleanup(mod);
free_modinfo:
free_modinfo(mod);
free_unload:
module_unload_free(mod);
free_module:
module_deallocate(mod, &info);
free_copy:
free_copy(&info);
return ERR_PTR(err);
}
進入load_module()後定義了兩個重要的變量:
struct load_info info = { NULL, };
struct module *mod;
其中info是一個struct load_info結構體,這個結構體主要保存了ELF文件的一些基本信息:
struct load_info {
Elf_Ehdr *hdr;
unsigned long len;
Elf_Shdr *sechdrs;
char *secstrings, *strtab;
unsigned long symoffs, stroffs;
struct _ddebug *debug;
unsigned int num_debug;
struct {
unsigned int sym, str, mod, vers, info, pcpu;
} index;
};
hdr是ELF文件頭的指針,len是文件長度,sechdrs是段表指針,secstrings 和 strtab 分別是段表字符串表和字符串表的首地址。index結構體裏保存的是一些段在段表裏的索引號,看到有個mod段了吧,這個mod其實就是上面提到過的.gnu.linkonce.this_module段在段表中的下標。
struct module這個結構體的內容和.gnu.linkonce.this_module段的內容是一一對應的,定義有點複雜,用到的時候再看。
接着load_module()調用copy_and_check(),copy_and_check函數主要是vmalloc一塊臨時的內核空間,將用戶空間的ko文件整個複製進去,然後再檢查一下文件是否爲ELF類型、文件長度有沒有超過段表尾端等等。最後將ELF文件的一些信息賦值給info。
static int copy_and_check(struct load_info *info,
const void __user *umod, unsigned long len,
const char __user *uargs)
{
int err;
Elf_Ehdr *hdr;
if (len < sizeof(*hdr))
return -ENOEXEC;
/* Suck in entire file: we'll want most of it. */
if ((hdr = vmalloc(len)) == NULL)
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
}
/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
|| hdr->e_type != ET_REL
|| !elf_check_arch(hdr)
|| hdr->e_shentsize != sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
}
if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
}
info->hdr = hdr;
info->len = len;
printk(KERN_ERR "temp space hdr: %x\n", hdr);
return 0;
free_hdr:
vfree(hdr);
return err;
}
接着load_module()調用layout_and_allocate()函數,這個函數主要任務是決定ko文件中哪些段需要爲其分配地址空間,併爲ko文件中的每個需要加載的段計算並分配虛擬地址,也就是運行時地址。前面說過,ko文件是類似於.o目標文件的,所以它的每個段的虛擬地址就像目標文件一樣,都爲0,因此需要鏈接後才能運行。分配每個段的運行時地址就是鏈接的第一步。
static struct module *layout_and_allocate(struct load_info *info)
{
/* Module within temporary copy. */
struct module *mod;
Elf_Shdr *pcpusec;
int err;
mod = setup_load_info(info);
if (IS_ERR(mod))
return mod;
printk(KERN_ERR "mod name = %s\n", mod->name);
printk(KERN_ERR "mod init = %s\n", mod->init);
err = check_modinfo(mod, info);
if (err)
return ERR_PTR(err);
/* Allow arches to frob section contents and sizes. */
err = module_frob_arch_sections(info->hdr, info->sechdrs,
info->secstrings, mod);
if (err < 0)
goto out;
pcpusec = &info->sechdrs[info->index.pcpu];
printk(KERN_ERR "pcpusec size: %d\n",pcpusec->sh_size);
if (pcpusec->sh_size) {
/* We have a special allocation for this section. */
err = percpu_modalloc(mod,
pcpusec->sh_size, pcpusec->sh_addralign);
if (err)
goto out;
pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
}
/* Determine total sizes, and put offsets in sh_entsize. For now
this is done generically; there doesn't appear to be any
special cases for the architectures. */
layout_sections(mod, info);
layout_symtab(mod, info);
/* Allocate and move to the final place */
err = move_module(mod, info);
if (err)
goto free_percpu;
/* Module has been copied to its final place now: return it. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
kmemleak_load_module(mod, info);
return mod;
free_percpu:
percpu_modfree(mod);
out:
return ERR_PTR(err);
}
layout_and_allocate()函數先調用setup_load_info()對info進一步初始化,接着調用layout_sections()分配各個段在最終虛擬地址上的偏移,.init段會被單獨分配偏移,因爲.init段的虛擬地址是單獨分配的,後面將詳述。然後調用layout_symtab()分配符號表和字符串表在虛擬地址上的偏移。最後調用move_module()將段移動到最終的虛擬地址上去。
setup_load_info()函數繼續用ELF信息去初始化info結構體,並調用rewrite_section_headers()。
static struct module *setup_load_info(struct load_info *info)
{
unsigned int i;
int err;
struct module *mod;
/* Set up the convenience variables */
info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
info->secstrings = (void *)info->hdr
+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;
err = rewrite_section_headers(info);
if (err)
return ERR_PTR(err);
/* Find internal symbols and strings. */
for (i = 1; i < info->hdr->e_shnum; i++) {
if (info->sechdrs[i].sh_type == SHT_SYMTAB) {
info->index.sym = i;
info->index.str = info->sechdrs[i].sh_link;
info->strtab = (char *)info->hdr
+ info->sechdrs[info->index.str].sh_offset;
break;
}
}
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
if (!info->index.mod) {
printk(KERN_WARNING "No module found in object\n");
return ERR_PTR(-ENOEXEC);
}
/* This is temporary: point mod into copy of data. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
printk(KERN_ERR "temp mod: %x, &mod->init: %x\n", mod, &(mod->init));
if (info->index.sym == 0) {
printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
mod->name);
return ERR_PTR(-ENOEXEC);
}
info->index.pcpu = find_pcpusec(info);
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
return ERR_PTR(-ENOEXEC);
return mod;
}
rewrite_section_headers()這個函數將每個段的虛擬地址暫時設置爲其在臨時空間中的地址。並將info段和vers段的SHF_ALLOC符號清零,表示不爲這兩個段分配空間。
static int rewrite_section_headers(struct load_info *info)
{
unsigned int i;
/* This should always be true, but let's be sure. */
info->sechdrs[0].sh_addr = 0;
for (i = 1; i < info->hdr->e_shnum; i++) {
Elf_Shdr *shdr = &info->sechdrs[i];
if (shdr->sh_type != SHT_NOBITS
&& info->len < shdr->sh_offset + shdr->sh_size) {
printk(KERN_ERR "Module len %lu truncated\n",
info->len);
return -ENOEXEC;
}
/* Mark all sections sh_addr with their address in the
temporary image. */
shdr->sh_addr = (size_t)info->hdr + shdr->sh_offset;
printk(KERN_ERR "section:%s sh_addr: %x\n", info->secstrings + shdr->sh_name, shdr->sh_addr);
#ifndef CONFIG_MODULE_UNLOAD
/* Don't load .exit sections */
if (strstarts(info->secstrings+shdr->sh_name, ".exit"))
shdr->sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
}
/* Track but don't keep modinfo and version sections. */
info->index.vers = find_sec(info, "__versions");
info->index.info = find_sec(info, ".modinfo");
info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
return 0;
}
rewrite_section_headers()返回後,將符號表和字符串表的信息記錄在info中。
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
這裏將.gnu.linkonce.this_module段在段表中的下標記錄在mod中。
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
現在,mod指針就指向臨時空間中的.gnu.linkonce.this_module段的地址了,而.gnu.linkonce.this_module段的內容是編譯器生成的並初始化的,因此struct module這個結構體的初始值相當於編譯時就設置好了。不信可以打印mod->name看看,就是KO文件的文件名。
好了,現在程序執行完setup_load_info()返回到layout_and_allocate(),接着layout_and_allocate()調用layout_sections()。
static void layout_sections(struct module *mod, struct load_info *info)
{
static unsigned long const masks[][2] = {
/* NOTE: all executable code must be the first section
* in this array; otherwise modify the text_size
* finder in the two loops below */
{ SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
{ SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
{ SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
{ ARCH_SHF_SMALL | SHF_ALLOC, 0 }
};
unsigned int m, i;
for (i = 0; i < info->hdr->e_shnum; i++)
info->sechdrs[i].sh_entsize = ~0UL;
pr_debug("Core section allocation order:\n");
printk(KERN_ERR "masks array size: %d\n", ARRAY_SIZE(masks));
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
printk(KERN_ERR "sname : %s\n", sname);
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
|| strstarts(sname, ".init")) {
printk(KERN_ERR "skip..\n");
continue;
}
printk(KERN_ERR "core_size: %d\n", mod->core_size);
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
printk(KERN_ERR "sh_entsize: %d, core_size: %d\n", s->sh_entsize, mod->core_size);
pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
mod->core_size = debug_align(mod->core_size);
printk(KERN_ERR "case 0 core_size = %d\n", mod->core_size);
mod->core_text_size = mod->core_size;
break;
case 1: /* RO: text and ro-data */
mod->core_size = debug_align(mod->core_size);
printk(KERN_ERR "case 1 core_size = %d\n", mod->core_size);
mod->core_ro_size = mod->core_size;
break;
case 3: /* whole core */
mod->core_size = debug_align(mod->core_size);
break;
}
}
printk(KERN_ERR "parse init..\n");
pr_debug("Init section allocation order:\n");
for (m = 0; m < ARRAY_SIZE(masks); ++m) {
for (i = 0; i < info->hdr->e_shnum; ++i) {
Elf_Shdr *s = &info->sechdrs[i];
const char *sname = info->secstrings + s->sh_name;
printk(KERN_ERR "sname : %s\n", sname);
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
|| !strstarts(sname, ".init")) {
printk(KERN_ERR "skip..\n");
continue;
}
printk(KERN_ERR "init_size: %d\n", mod->init_size);
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
| INIT_OFFSET_MASK);
printk(KERN_ERR "sh_entsize: %d, init_size: %d\n", s->sh_entsize, mod->init_size);
pr_debug("\t%s\n", sname);
}
switch (m) {
case 0: /* executable */
mod->init_size = debug_align(mod->init_size);
mod->init_text_size = mod->init_size;
break;
case 1: /* RO: text and ro-data */
mod->init_size = debug_align(mod->init_size);
mod->init_ro_size = mod->init_size;
break;
case 3: /* whole init */
mod->init_size = debug_align(mod->init_size);
break;
}
}
}
layout_sections()利用了struct module 裏的兩個成員變量:core_size 和 init_size,後面會看到,kernel爲ko文件分配最終虛擬地址的時候,實際上分配了兩塊地址,一塊叫core,另一塊叫init, 這兩個變量分別記錄了這兩塊地址的size。一個內核模塊爲什麼要分配兩塊地址呢?這是考慮到內核模塊的__init函數只運行一次,所以將它單獨放在一塊內存中可以方便運行結束後,回收這塊內存。。__init函數就是用 __init 宏定義的函數, #define __init __section(.init.text) ,編譯器會將它放入ko文件的.init.text段中。
第一個for循環將所有段的sh_entsize設置爲一個特殊值——0xffffffff。這是個標記,凡是sh_entsize等於這個值的段,就是還未被分配虛擬空間偏移的段。
前面說了,爲ko文件分配的最終虛擬地址有兩塊,core空間和init空間,core_size和init_size記錄了這兩個空間的size,初始值爲0。
第二個for循環爲所有具有SHF_ALLOC標誌,並且非.init的段分配其在core虛擬空間的偏移,這些段後面將會被複制到core虛擬空間,這是不會被自動釋放,常駐內核的空間。
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
每個段在core空間的偏移記錄在sh_entsize中,偏移是通過get_offset得到的,這個函數很簡單,arch_mod_section_prepend()函數爲每個段額外的分配幾個byte,可以先認爲返回0,第一次調用傳入的*size爲0,ALIGN()宏是對齊用的宏,一般仍然返回*size,所以可以簡化爲ret = *size; 接着將core_size加上這個段的size,返回。後面再爲下一個段分配偏移的時候,core_size已經非0了,分配的偏移就是*size的大小,每次分配core_size都增加相應的段的size。static long get_offset(struct module *mod, unsigned int *size,
Elf_Shdr *sechdr, unsigned int section)
{
long ret;
*size += arch_mod_section_prepend(mod, section);
ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
*size = ret + sechdr->sh_size;
printk(KERN_ERR "sh_addralign: %d, ret: %ld, sh_size: %d\n", sechdr->sh_addralign, ret, sechdr->sh_size);
return ret;
}
當第二個for循環完畢,第三個for循環就爲.init段分配其在init虛擬空間的偏移,分配方法和前面一樣,然後返回layout_and_allocate()函數。layout_and_allocate()函數接着調用layout_symtab()爲符號表和字符串表分配虛擬空間。symsect和strsect分別是表示符號表和字符串表的段描述符。符號表和字符串表會在core空間與init空間同時分配。
static void layout_symtab(struct module *mod, struct load_info *info)
{
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
unsigned int i, nsrc, ndst, strtab_size;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
printk(KERN_ERR "symsect->sh_entsize: %d\n", symsect->sh_entsize);
pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
printk(KERN_ERR "symb num : %d\n", nsrc);
/* strtab always starts with a nul, so offset 0 is the empty string. */
strtab_size = 1;
/* Compute total space required for the core symbols' strtab. */
for (ndst = i = 0; i < nsrc; i++) {
if (i == 0 ||
is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
strtab_size += strlen(&info->strtab[src[i].st_name])+1;
ndst++;
}
}
/* Append room for core symbols at end of core part. */
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->core_size += strtab_size;
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
}
先爲符號表在init空間分配偏移:
symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
info->index.sym) | INIT_OFFSET_MASK;
接着爲“core符號”及其對應的字符串在core空間分配偏移,其實就是隻將部分符號表在core空間分配偏移,遍歷符號表,對每個符號表項調用is_core_symbol()函數判斷是否爲“core符號”,如果是,爲core符號對應的字符串分配空間,字符串空間記錄在strtab_size中。
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
printk(KERN_ERR "symb num : %d\n", nsrc);
/* strtab always starts with a nul, so offset 0 is the empty string. */
strtab_size = 1;
/* Compute total space required for the core symbols' strtab. */
for (ndst = i = 0; i < nsrc; i++) {
if (i == 0 ||
is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
strtab_size += strlen(&info->strtab[src[i].st_name])+1;
ndst++;
}
}
這三句就爲core空間的符號表與字符串表分配好了偏移。(注意:分配的偏移沒有記錄在sh_entsize中,只是記錄在info結構體中,也就是說只會爲core空間的符號表與字符串表預留好位置,不會真的將符號表、字符串表複製到core空間來)
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->core_size += strtab_size;
最後爲字符串表分配init空間的偏移。
strsect->sh_flags |= SHF_ALLOC;
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
info->index.str) | INIT_OFFSET_MASK;
返回layout_and_allocate()函數。調用move_module函數進行虛擬空間的實際申請,和段的加載操作。
static int move_module(struct module *mod, struct load_info *info)
{
int i;
void *ptr;
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
* leak.
*/
kmemleak_not_leak(ptr);
if (!ptr)
return -ENOMEM;
memset(ptr, 0, mod->core_size);
mod->module_core = ptr;
ptr = module_alloc_update_bounds(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
* scanned as it contains data and code that will be freed
* after the module is initialized.
*/
kmemleak_ignore(ptr);
if (!ptr && mod->init_size) {
module_free(mod, mod->module_core);
return -ENOMEM;
}
memset(ptr, 0, mod->init_size);
mod->module_init = ptr;
/* Transfer each section which specifies SHF_ALLOC */
pr_debug("final section addresses:\n");
for (i = 0; i < info->hdr->e_shnum; i++) {
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
if (!(shdr->sh_flags & SHF_ALLOC))
continue;
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + shdr->sh_entsize;
printk(KERN_ERR "name: %s, dest : %x\n",info->secstrings + shdr->sh_name, dest);
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
/* Update sh_addr to point to copy in image. */
shdr->sh_addr = (unsigned long)dest;
pr_debug("\t0x%lx %s\n",
(long)shdr->sh_addr, info->secstrings + shdr->sh_name);
}
return 0;
}
先爲core空間申請一塊大小爲core_size的內存,將其首地址賦值給struct module結構體的module_core成員:
ptr = module_alloc_update_bounds(mod->core_size);
mod->module_core = ptr;
再爲init空間申請一塊大小爲init_size的內存,將其首地址賦值給struct module結構體的module_init成員:
ptr = module_alloc_update_bounds(mod->init_size);
mod->module_init = ptr;
下面的for循環對每個有SHF_ALLOC標記的段分配絕對虛擬地址(前面分配的只是各個段相對於未來要分配的虛擬地址的偏移,也就是相對於module_core和module_init的偏移)。分配絕對虛擬地址很簡單,將申請的虛擬空間的地址(分別保存在module_core和module_init中)直接加上之前分配好的偏移量就行了。如下:(符號表和字符串表的絕對虛擬地址都被分配到了init空間內,所以後面搬移的時候是把這兩個表搬移到了init空間而非core空間)
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + shdr->sh_entsize;
開始段的搬移,將段從臨時內核空間,搬移到運行時的虛擬地址上去:
if (shdr->sh_type != SHT_NOBITS)
memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
最後把絕對虛擬地址賦值給相應段表項的sh_addr成員。返回。
到目前爲止,ko文件的對應的段,已經被加載進了內核空間,每個被加載的段都有了自己的運行時地址。但現在還不能運行,因爲代碼中對符號的引用還沒有修正,也就是還沒有進行符號重定位。前面說過ko文件類似與.o文件,.o文件對全局符號的引用都是待重定位的,是需要鏈接器對符號進行鏈接的,ko文件也一樣。只不過ko文件代碼中對符號的引用是由內核來進行重定位的。
move_module()返回後,返回到layout_and_allocate()函數中。layout_and_allocate()函數最後將mod指針變量重新指向搬移後的.gnu.linkonce.this_module段的虛擬地址值。
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
現在layout_and_allocate()函數執行完了,返回到load_module函數繼續往下執行,
find_module_sections(mod, &info);
這主要是對info進一步初始化。
接着調用simplify_symbols()函數,這個函數將符號表裏的符號的絕對地址寫入到st_value域中(符號表、字符串表現在都在init空間了)。
for循環遍歷init空間的符號表,分析每個符號表項的st_shndx域,st_shndx通常表示符號所在的段,但它有三個特殊值:SHN_ABS,SHN_COMMON,SHN_UNDEF。所以函數中分了4種case來進行處理。SHN_COMMON和SHN_ABS的case先不考慮,因爲我們的代碼中的符號主要分兩種,模塊內的符號和內核export出的符號(如printk),這兩種符號的st_shndx大部分對應default和SHN_UNDEF這兩種case。
對於模塊內的符號,程序進入default進行處理,處理很簡單,st_value = st_value + 符號所在段的絕對虛擬地址(st_value中原本保存着符號在其所在段的offset)。這樣一來,st_value中現在保存的就是符號的絕對虛擬地址了。
對於內核導出的符號,由於它在模塊中沒有定義,所以它的st_shndx爲SHN_UNDEF。對於SHN_UNDEF 這種case的處理過程如下:
1:調用resolve_symbol_wait()函數解析內核符號,這個函數返回一個struct kernel_symbol結構體。
2:將這個結構體的value成員直接賦值給st_value。
結構體定義在include/linux/export.h中:
struct kernel_symbol
{
unsigned long value;
const char *name;
};
static int simplify_symbols(struct module *mod, const struct load_info *info)
{
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
Elf_Sym *sym = (void *)symsec->sh_addr;
unsigned long secbase;
unsigned int i;
int ret = 0;
const struct kernel_symbol *ksym;
for (i = 1; i < symsec->sh_size / sizeof(Elf_Sym); i++) {
const char *name = info->strtab + sym[i].st_name;
printk(KERN_ERR "symb name: %s\n", name);
switch (sym[i].st_shndx) {
case SHN_COMMON:
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);
printk("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
break;
case SHN_ABS:
/* Don't need to do anything */
pr_debug("Absolute symbol: 0x%08lx\n",
(long)sym[i].st_value);
break;
case SHN_UNDEF:
printk(KERN_ERR "Undefine symb!!\n");
ksym = resolve_symbol_wait(mod, info, name);
printk(KERN_ERR "resolve ok!\n");
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
sym[i].st_value = ksym->value;
break;
}
/* Ok if weak. */
if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
break;
printk(KERN_WARNING "%s: Unknown symbol %s (err %li)\n",
mod->name, name, PTR_ERR(ksym));
ret = PTR_ERR(ksym) ?: -ENOENT;
break;
default:
/* Divert to percpu allocation if a percpu var. */
if (sym[i].st_shndx == info->index.pcpu)
secbase = (unsigned long)mod_percpu(mod);
else
secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
printk(KERN_ERR "section base: %x\n", secbase);
sym[i].st_value += secbase;
printk(KERN_ERR "st_value: %x\n", sym[i].st_value);
break;
}
}
return ret;
}
resolve_symbol_wait()函數用來解析內核導出的符號。不是所有的內核符號都默認導出的,默認內核中的符號在運行時是對外“不可見的”,而內核本身對那些符號地址的引用,都是靜態編譯鏈接內核時,鏈接器寫進去的。所以外部模塊無法得到內核符號的地址。如果外部模塊想要引用內核符號,除非內核將符號地址導出來!內核中的符號可以通過EXPORT_SYMBOL()宏來導出,這個宏就是將符號信息保存在一個struct kernel_symbol結構體中,再將這個結構體編譯進內核的一個特殊段,以後如果外部想引用這個符號,只需要在這個段中尋找對應的符號的struct kernel_symbol結構體就行了。更詳細的細節google上有很多。
現在來看這個resolve_symbol_wait()函數:
static const struct kernel_symbol *
resolve_symbol_wait(struct module *mod,
const struct load_info *info,
const char *name)
{
const struct kernel_symbol *ksym;
char owner[MODULE_NAME_LEN];
if (wait_event_interruptible_timeout(module_wq,
!IS_ERR(ksym = resolve_symbol(mod, info, name, owner))
|| PTR_ERR(ksym) != -EBUSY,
30 * HZ) <= 0) {
printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
mod->name, owner);
}
return ksym;
}
resolve_symbol_wait()函數會先調用resolve_symbol()解析內核導出符號。
因此進入resolve_symbol()函數:
static const struct kernel_symbol *resolve_symbol(struct module *mod,
const struct load_info *info,
const char *name,
char ownername[])
{
struct module *owner;
const struct kernel_symbol *sym;
const unsigned long *crc;
int err;
printk(KERN_ERR "resolve symbol: %s...\n", name);
mutex_lock(&module_mutex);
sym = find_symbol(name, &owner, &crc,
!(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
if (!sym)
goto unlock;
if (!check_version(info->sechdrs, info->index.vers, name, mod, crc,
owner)) {
sym = ERR_PTR(-EINVAL);
goto getname;
}
err = ref_module(mod, owner);
if (err) {
sym = ERR_PTR(err);
goto getname;
}
getname:
/* We must make copy under the lock if we failed to get ref. */
strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
unlock:
mutex_unlock(&module_mutex);
return sym;
}
resolve_symbol()函數繼續調用find_symbol()函數,返回一個struct kernel_symbol結構指針。
const struct kernel_symbol *find_symbol(const char *name,
struct module **owner,
const unsigned long **crc,
bool gplok,
bool warn)
{
struct find_symbol_arg fsa;
fsa.name = name;
fsa.gplok = gplok;
fsa.warn = warn;
if (each_symbol_section(find_symbol_in_section, &fsa)) {
if (owner)
*owner = fsa.owner;
if (crc)
*crc = fsa.crc;
return fsa.sym;
}
pr_debug("Failed to find symbol %s\n", name);
return NULL;
}
進入find_symbol()函數,函數內部先構造一個搜索符號的參數 fsa,並根據要解析的內核符號初始化這個參數。接着以&fsa和find_symbol_in_section這個函數指針爲參數調用each_symbol_section()函數。
bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
struct module *owner,
void *data),
void *data)
{
struct module *mod;
static const struct symsearch arr[] = {
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
NOT_GPL_ONLY, false },
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
__start___kcrctab_gpl,
GPL_ONLY, false },
{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
__start___kcrctab_gpl_future,
WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
{ __start___ksymtab_unused, __stop___ksymtab_unused,
__start___kcrctab_unused,
NOT_GPL_ONLY, true },
{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
__start___kcrctab_unused_gpl,
GPL_ONLY, true },
#endif
};
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
list_for_each_entry_rcu(mod, &modules, list) {
struct symsearch arr[] = {
{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
NOT_GPL_ONLY, false },
{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
mod->gpl_crcs,
GPL_ONLY, false },
{ mod->gpl_future_syms,
mod->gpl_future_syms + mod->num_gpl_future_syms,
mod->gpl_future_crcs,
WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
{ mod->unused_syms,
mod->unused_syms + mod->num_unused_syms,
mod->unused_crcs,
NOT_GPL_ONLY, true },
{ mod->unused_gpl_syms,
mod->unused_gpl_syms + mod->num_unused_gpl_syms,
mod->unused_gpl_crcs,
GPL_ONLY, true },
#endif
};
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
return true;
}
return false;
}
這個函數裏有個static數組,數組元素是struct symsearch結構體,這個結構體是用來描述一個內核符號表的,start stop 分別描述表的頭尾,crcs暫時不管,licence描述符號的“證書”。
struct symsearch {
const struct kernel_symbol *start, *stop;
const unsigned long *crcs;
enum {
NOT_GPL_ONLY,
GPL_ONLY,
WILL_BE_GPL_ONLY,
} licence;
bool unused;
};
可以看出內核定義了3~5個內核符號表。。這些描述符號表的結構體的成員都已經賦了值,像__start___ksymtab,__stop___ksymtab等等,這些值定義在arch/arm/kernel/vmlinux.lds中,也就是定義在鏈接腳本中。前面說過,內核符號通過EXPORT_SYMBOL()宏導出到一個特殊段,在鏈接內核的時候,鏈接腳本就將這些段合併爲幾個內核符號表段,並定義了幾個標誌開始和結束地址的符號,如__start___ksymtab,__stop___ksymtab就標識了___ksymtab符號表段的開始地址和結束地址。
static const struct symsearch arr[] = {
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
NOT_GPL_ONLY, false },
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
__start___kcrctab_gpl,
GPL_ONLY, false },
{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
__start___kcrctab_gpl_future,
WILL_BE_GPL_ONLY, false },
#ifdef CONFIG_UNUSED_SYMBOLS
{ __start___ksymtab_unused, __stop___ksymtab_unused,
__start___kcrctab_unused,
NOT_GPL_ONLY, true },
{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
__start___kcrctab_unused_gpl,
GPL_ONLY, true },
#endif
};
接着回到each_symbol_section()函數,函數調用each_symbol_in_section(),參數是這個static數組指針和數組的size,owner爲NULL,*fn就是 find_symbol_in_section,data其實是&fsa這個指針。each_symbol_in_section()這個函數做的事很簡單,就是遍歷arr數組中的每個內核符號表,調用find_symbol_in_section函數在每個內核符號表裏搜索&fsa指定的符號。
static bool each_symbol_in_section(const struct symsearch *arr,
unsigned int arrsize,
struct module *owner,
bool (*fn)(const struct symsearch *syms,
struct module *owner,
void *data),
void *data)
{
unsigned int j;
for (j = 0; j < arrsize; j++) {
if (fn(&arr[j], owner, data))
return true;
}
return false;
}
find_symbol_in_section函數如下,先將data指針轉化爲struct find_symbol_arg結構體指針,再調用bsearch在syms描述的內核符號表裏搜索fsa指定的內核符號,我們假設搜索到了這個符號,搜索的結果就存放在struct kernel_symbol這個結構體中。
static bool find_symbol_in_section(const struct symsearch *syms,
struct module *owner,
void *data)
{
struct find_symbol_arg *fsa = data;
struct kernel_symbol *sym;
sym = bsearch(fsa->name, syms->start, syms->stop - syms->start,
sizeof(struct kernel_symbol), cmp_name);
if(sym != NULL)
printk(KERN_ERR "name: %s, sym value: %x\n", fsa->name, sym->value);
if (sym != NULL && check_symbol(syms, owner, sym - syms->start, data))
return true;
return false;
}
bsearch函數就是用“二分法”查表,不說了。。
void *bsearch(const void *key, const void *base, size_t num, size_t size,
int (*cmp)(const void *key, const void *elt))
{
size_t start = 0, end = num;
int result;
while (start < end) {
size_t mid = start + (end - start) / 2;
result = cmp(key, base + mid * size);
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
return (void *)base + mid * size;
}
return NULL;
}
bsearch返回後,調用check_symbol()函數,這個函數在返回前將搜索到的內核導出符號的struct kernel_symbol結構指針賦值給fsa->sym。static bool check_symbol(const struct symsearch *syms,
struct module *owner,
unsigned int symnum, void *data)
{
struct find_symbol_arg *fsa = data;
if (!fsa->gplok) {
if (syms->licence == GPL_ONLY)
return false;
if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
printk(KERN_WARNING "Symbol %s is being used "
"by a non-GPL module, which will not "
"be allowed in the future\n", fsa->name);
printk(KERN_WARNING "Please see the file "
"Documentation/feature-removal-schedule.txt "
"in the kernel source tree for more details.\n");
}
}
#ifdef CONFIG_UNUSED_SYMBOLS
if (syms->unused && fsa->warn) {
printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
"however this module is using it.\n", fsa->name);
printk(KERN_WARNING
"This symbol will go away in the future.\n");
printk(KERN_WARNING
"Please evalute if this is the right api to use and if "
"it really is, submit a report the linux kernel "
"mailinglist together with submitting your code for "
"inclusion.\n");
}
#endif
fsa->owner = owner;
fsa->crc = symversion(syms->crcs, symnum);
fsa->sym = &syms->start[symnum];
return true;
}
check_symbol()函數返回後,find_symbol_in_section函數也返回true,並向上層層返回到find_symbol(),find_symbol()函數將fsa->sym返回至resolve_symbol()函數,接着resolve_symbol()函數調用ref_module(),第一個參數a是現在正在加載的模塊,第二個參數是a模塊用到的模塊,此處爲NULL。
int ref_module(struct module *a, struct module *b)
{
int err;
if (b == NULL || already_uses(a, b))
return 0;
/* If module isn't available, we fail. */
err = strong_try_module_get(b);
if (err)
return err;
err = add_module_usage(a, b);
if (err) {
module_put(b);
return err;
}
return 0;
}
因爲這裏b = NULL,所以這裏直接返回0;
返回到resolve_symbol(),resolve_symbol()最後將描述內核導出符號的struct kernel_symbol結構體的指針sym返回。
現在我們已經返回到resolve_symbol_wait()函數了,接下來調用wait_event_interruptible_timeout(),因爲我們剛纔已經得到了內核導出符號,那麼現在就不用睡眠等待,否則會睡眠。最後resolve_symbol_wait()返回resolve_symbol()剛剛返回的sym。
向上返回到simplify_symbols(),前面說過,將這個struct kernel_symbol結構體的value成員直接賦值給st_value。然後返回。
這樣繞了一大圈,simplify_symbols()終於返回了。。。simplify_symbols()返回後,在init空間的符號表的每個符號表項中,st_value域就指向符號的絕對虛擬地址值了。-_-
接下來load_module()調用apply_relocations()進行真正的重定位工作。
static int apply_relocations(struct module *mod, const struct load_info *info)
{
unsigned int i;
int err = 0;
/* Now do relocations. */
for (i = 1; i < info->hdr->e_shnum; i++) {
unsigned int infosec = info->sechdrs[i].sh_info;
/* Not a valid relocation section? */
if (infosec >= info->hdr->e_shnum)
continue;
/* Don't bother with non-allocated sections */
if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC))
continue;
if (info->sechdrs[i].sh_type == SHT_REL) {
printk(KERN_ERR "relocate section : %s, type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_REL");
err = apply_relocate(info->sechdrs, info->strtab,
info->index.sym, i, mod);
printk(KERN_ERR "mod init: %x\n", mod->init);
}
else if (info->sechdrs[i].sh_type == SHT_RELA) {
printk(KERN_ERR "relocate section : %s, type: %s\n", info->secstrings + (info->sechdrs[i].sh_name), "SHT_RELA");
err = apply_relocate_add(info->sechdrs, info->strtab,
info->index.sym, i, mod);
}
if (err < 0)
break;
}
return err;
}
for循環遍歷臨時內核空間的各個段,篩選出其中有效的重定位表段,對重定位表所作用的段進行重定位。重定位段的類型主要有SHT_REL和SHT_RELA,以SHT_REL爲例,當重定位表段的類型是SHT_REL時,調用apply_relocate()進行重定位。apply_relocate()位於arch/arm/kernel/module.c中。
int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
unsigned int relindex, struct module *module)
{
Elf32_Shdr *symsec = sechdrs + symindex;
Elf32_Shdr *relsec = sechdrs + relindex;
Elf32_Shdr *dstsec = sechdrs + relsec->sh_info;
Elf32_Rel *rel = (void *)relsec->sh_addr;
unsigned int i;
for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
unsigned long loc;
Elf32_Sym *sym;
const char *symname;
s32 offset;
#ifdef CONFIG_THUMB2_KERNEL
u32 upper, lower, sign, j1, j2;
#endif
offset = ELF32_R_SYM(rel->r_info);
if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) {
pr_err("%s: section %u reloc %u: bad relocation sym offset\n",
module->name, relindex, i);
return -ENOEXEC;
}
sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
symname = strtab + sym->st_name;
if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n",
module->name, relindex, i, symname,
rel->r_offset, dstsec->sh_size);
return -ENOEXEC;
}
loc = dstsec->sh_addr + rel->r_offset;
switch (ELF32_R_TYPE(rel->r_info)) {
case R_ARM_NONE:
/* ignore */
break;
case R_ARM_ABS32:
*(u32 *)loc += sym->st_value;
break;
case R_ARM_PC24:
case R_ARM_CALL:
case R_ARM_JUMP24:
offset = (*(u32 *)loc & 0x00ffffff) << 2;
if (offset & 0x02000000)
offset -= 0x04000000;
offset += sym->st_value - loc;
if (offset & 3 ||
offset <= (s32)0xfe000000 ||
offset >= (s32)0x02000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
module->name, relindex, i, symname,
ELF32_R_TYPE(rel->r_info), loc,
sym->st_value);
return -ENOEXEC;
}
offset >>= 2;
*(u32 *)loc &= 0xff000000;
*(u32 *)loc |= offset & 0x00ffffff;
break;
case R_ARM_V4BX:
/* Preserve Rm and the condition code. Alter
* other bits to re-code instruction as
* MOV PC,Rm.
*/
*(u32 *)loc &= 0xf000000f;
*(u32 *)loc |= 0x01a0f000;
break;
case R_ARM_PREL31:
offset = *(u32 *)loc + sym->st_value - loc;
*(u32 *)loc = offset & 0x7fffffff;
break;
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_ABS:
offset = *(u32 *)loc;
offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
offset >>= 16;
*(u32 *)loc &= 0xfff0f000;
*(u32 *)loc |= ((offset & 0xf000) << 4) |
(offset & 0x0fff);
break;
#ifdef CONFIG_THUMB2_KERNEL
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
upper = *(u16 *)loc;
lower = *(u16 *)(loc + 2);
/*
* 25 bit signed address range (Thumb-2 BL and B.W
* instructions):
* S:I1:I2:imm10:imm11:0
* where:
* S = upper[10] = offset[24]
* I1 = ~(J1 ^ S) = offset[23]
* I2 = ~(J2 ^ S) = offset[22]
* imm10 = upper[9:0] = offset[21:12]
* imm11 = lower[10:0] = offset[11:1]
* J1 = lower[13]
* J2 = lower[11]
*/
sign = (upper >> 10) & 1;
j1 = (lower >> 13) & 1;
j2 = (lower >> 11) & 1;
offset = (sign << 24) | ((~(j1 ^ sign) & 1) << 23) |
((~(j2 ^ sign) & 1) << 22) |
((upper & 0x03ff) << 12) |
((lower & 0x07ff) << 1);
if (offset & 0x01000000)
offset -= 0x02000000;
offset += sym->st_value - loc;
/*
* For function symbols, only Thumb addresses are
* allowed (no interworking).
*
* For non-function symbols, the destination
* has no specific ARM/Thumb disposition, so
* the branch is resolved under the assumption
* that interworking is not required.
*/
if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC &&
!(offset & 1)) ||
offset <= (s32)0xff000000 ||
offset >= (s32)0x01000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
module->name, relindex, i, symname,
ELF32_R_TYPE(rel->r_info), loc,
sym->st_value);
return -ENOEXEC;
}
sign = (offset >> 24) & 1;
j1 = sign ^ (~(offset >> 23) & 1);
j2 = sign ^ (~(offset >> 22) & 1);
*(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) |
((offset >> 12) & 0x03ff));
*(u16 *)(loc + 2) = (u16)((lower & 0xd000) |
(j1 << 13) | (j2 << 11) |
((offset >> 1) & 0x07ff));
break;
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_ABS:
upper = *(u16 *)loc;
lower = *(u16 *)(loc + 2);
/*
* MOVT/MOVW instructions encoding in Thumb-2:
*
* i = upper[10]
* imm4 = upper[3:0]
* imm3 = lower[14:12]
* imm8 = lower[7:0]
*
* imm16 = imm4:i:imm3:imm8
*/
offset = ((upper & 0x000f) << 12) |
((upper & 0x0400) << 1) |
((lower & 0x7000) >> 4) | (lower & 0x00ff);
offset = (offset ^ 0x8000) - 0x8000;
offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
offset >>= 16;
*(u16 *)loc = (u16)((upper & 0xfbf0) |
((offset & 0xf000) >> 12) |
((offset & 0x0800) >> 1));
*(u16 *)(loc + 2) = (u16)((lower & 0x8f00) |
((offset & 0x0700) << 4) |
(offset & 0x00ff));
break;
#endif
default:
printk(KERN_ERR "%s: unknown relocation: %u\n",
module->name, ELF32_R_TYPE(rel->r_info));
return -ENOEXEC;
}
}
return 0;
}
這個函數所做的大概工作就是遍歷重定位表項,對每個重定位項,找到重定位入口地址,再根據符號表得到符號的絕對虛擬地址,再根據重定位入口的類型,進行對應的地址修正。總之,重定位完成後,代碼中對符號的引用,都將會被修正爲符號在內核的正確地址。
這裏有一點要注意下,前面不是提到.gnu.linkonce.this_module段嗎?這個段也有一個自己的重定位表,叫.rel.gnu.linkonce.this_module,這個重定位表裏只有兩個重定位表項,還記得前面提到的struct module結構體嗎?現在給出struct module結構體的定義。。module結構內有兩個成員,init和exit。這兩個成員存放着模塊的__init函數和__exit函數的指針,.rel.gnu.linkonce.this_module重定位表中的兩個重定位項就分別對應着.gnu.linkonce.this_module段中的這兩個指針!
也就是說,這兩個指針的值也會被apply_relocate()函數重定位,重定位這兩個指針有什麼用呢?因爲後面將會用這兩個指針,調用模塊的__init函數和__exit函數。
struct module
{
enum module_state state;
/* Member of list of modules */
struct list_head list;
/* Unique handle for this module */
char name[MODULE_NAME_LEN];
/* Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
const char *version;
const char *srcversion;
struct kobject *holders_dir;
/* Exported symbols */
const struct kernel_symbol *syms;
const unsigned long *crcs;
unsigned int num_syms;
/* Kernel parameters. */
struct kernel_param *kp;
unsigned int num_kp;
/* GPL-only exported symbols. */
unsigned int num_gpl_syms;
const struct kernel_symbol *gpl_syms;
const unsigned long *gpl_crcs;
#ifdef CONFIG_UNUSED_SYMBOLS
/* unused exported symbols. */
const struct kernel_symbol *unused_syms;
const unsigned long *unused_crcs;
unsigned int num_unused_syms;
/* GPL-only, unused exported symbols. */
unsigned int num_unused_gpl_syms;
const struct kernel_symbol *unused_gpl_syms;
const unsigned long *unused_gpl_crcs;
#endif
/* symbols that will be GPL-only in the near future. */
const struct kernel_symbol *gpl_future_syms;
const unsigned long *gpl_future_crcs;
unsigned int num_gpl_future_syms;
/* Exception table */
unsigned int num_exentries;
struct exception_table_entry *extable;
/* Startup function. */
int (*init)(void);
/* If this is non-NULL, vfree after init() returns */
void *module_init;
/* Here is the actual code + data, vfree'd on unload. */
void *module_core;
/* Here are the sizes of the init and core sections */
unsigned int init_size, core_size;
/* The size of the executable code in each section. */
unsigned int init_text_size, core_text_size;
/* Size of RO sections of the module (text+rodata) */
unsigned int init_ro_size, core_ro_size;
/* Arch-specific module values */
struct mod_arch_specific arch;
unsigned int taints; /* same bits as kernel:tainted */
#ifdef CONFIG_GENERIC_BUG
/* Support for BUG */
unsigned num_bugs;
struct list_head bug_list;
struct bug_entry *bug_table;
#endif
#ifdef CONFIG_KALLSYMS
/*
* We keep the symbol and string tables for kallsyms.
* The core_* fields below are temporary, loader-only (they
* could really be discarded after module init).
*/
Elf_Sym *symtab, *core_symtab;
unsigned int num_symtab, core_num_syms;
char *strtab, *core_strtab;
/* Section attributes */
struct module_sect_attrs *sect_attrs;
/* Notes attributes */
struct module_notes_attrs *notes_attrs;
#endif
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args;
#ifdef CONFIG_SMP
/* Per-cpu data. */
void __percpu *percpu;
unsigned int percpu_size;
#endif
#ifdef CONFIG_TRACEPOINTS
unsigned int num_tracepoints;
struct tracepoint * const *tracepoints_ptrs;
#endif
#ifdef HAVE_JUMP_LABEL
struct jump_entry *jump_entries;
unsigned int num_jump_entries;
#endif
#ifdef CONFIG_TRACING
unsigned int num_trace_bprintk_fmt;
const char **trace_bprintk_fmt_start;
#endif
#ifdef CONFIG_EVENT_TRACING
struct ftrace_event_call **trace_events;
unsigned int num_trace_events;
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
unsigned int num_ftrace_callsites;
unsigned long *ftrace_callsites;
#endif
#ifdef CONFIG_MODULE_UNLOAD
/* What modules depend on me? */
struct list_head source_list;
/* What modules do I depend on? */
struct list_head target_list;
/* Who is waiting for us to be unloaded */
struct task_struct *waiter;
/* Destruction function. */
void (*exit)(void);
struct module_ref __percpu *refptr;
#endif
#ifdef CONFIG_CONSTRUCTORS
/* Constructor functions. */
ctor_fn_t *ctors;
unsigned int num_ctors;
#endif
};
繼續一路返回到load_module()函數,load_module()後面的代碼不看,一路返回到sys_init_module(),在sys_init_module()中,後面會調用do_one_initcall(),參數就是被重定位過的init指針。
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
這個do_one_initcall()函數會調用這個init指針所指向的函數,至此,我們模塊的__init函數就被調用了。
(__init函數是指用__init前綴定義的函數,__exit函數是指用__exit前綴定義的函數,前面提過。)