看开源代码如何解析ELF文件

工具ROPgadget

在ROPgadget中有识别并分析多种文件结构,这次主要用这个功能来分析ELF文件格式。

分析的文件为libc.so

上代码

class Binary:
    def __init__(self, options):
        self.__fileName  = options.binary
        self.__rawBinary = None
        self.__binary    = None
        
        try:
            fd = open(self.__fileName, "rb")
            self.__rawBinary = fd.read()
            fd.close()
        except:
            print("[Error] Can't open the binary or binary not found")
            return None

        if   options.rawArch and options.rawMode:
             self.__binary = Raw(self.__rawBinary, options.rawArch, options.rawMode)
        elif self.__rawBinary[:4] == unhexlify(b"7f454c46"):
             self.__binary = ELF(self.__rawBinary)
        elif self.__rawBinary[:2] == unhexlify(b"4d5a"):
             self.__binary = PE(self.__rawBinary)
        elif self.__rawBinary[:4] == unhexlify(b"cafebabe"):
             self.__binary = UNIVERSAL(self.__rawBinary)
        elif self.__rawBinary[:4] == unhexlify(b"cefaedfe") or self.__rawBinary[:4] == unhexlify(b"cffaedfe"):
             self.__binary = MACHO(self.__rawBinary)
        else:
            print("[Error] Binary format not supported")
            return None
在binary.py中进行文件类型判定,ELF文件最开始四个字节‘7f454c46’

class ELFFlags:
    ELFCLASS32  = 0x01
    ELFCLASS64  = 0x02
    EI_CLASS    = 0x04
    EI_DATA     = 0x05
    ELFDATA2LSB = 0x01
    ELFDATA2MSB = 0x02
    EM_386      = 0x03
    EM_X86_64   = 0x3e
    EM_ARM      = 0x28
    EM_MIPS     = 0x08
    EM_SPARCv8p = 0x12
    EM_PowerPC  = 0x14
    EM_ARM64    = 0xb7

class ELF:
    def __init__(self, binary):
        self.__binary    = bytearray(binary)
        self.__ElfHeader = None
        self.__shdr_l    = []
        self.__phdr_l    = []

        self.__setHeaderElf()
        self.__setShdr()
        self.__setPhdr()
ELF文件类初始化

    def __setHeaderElf(self):
        e_ident = self.__binary[:15]#ELF文件魔数

        ei_class = e_ident[ELFFlags.EI_CLASS]
        ei_data  = e_ident[ELFFlags.EI_DATA]

        if ei_class != ELFFlags.ELFCLASS32 and ei_class != ELFFlags.ELFCLASS64:
            print("[Error] ELF.__setHeaderElf() - Bad Arch size")
            return None

        if ei_data != ELFFlags.ELFDATA2LSB and ei_data != ELFFlags.ELFDATA2MSB:
            print("[Error] ELF.__setHeaderElf() - Bad architecture endian")
            return None

        if ei_class == ELFFlags.ELFCLASS32: 
            if ei_data == ELFFlags.ELFDATA2LSB:
                self.__ElfHeader = Elf32_Ehdr_LSB.from_buffer_copy(self.__binary)
            elif ei_data == ELFFlags.ELFDATA2MSB:
                self.__ElfHeader = Elf32_Ehdr_MSB.from_buffer_copy(self.__binary)
        elif ei_class == ELFFlags.ELFCLASS64: 
            if ei_data == ELFFlags.ELFDATA2LSB:
                self.__ElfHeader = Elf64_Ehdr_LSB.from_buffer_copy(self.__binary)
            elif ei_data == ELFFlags.ELFDATA2MSB:
                self.__ElfHeader = Elf64_Ehdr_MSB.from_buffer_copy(self.__binary)

        self.getArch() # Check if architecture is supported
设置ELF文件头

ELF前16个字节称为魔数

其中前四字节之前已经说过了,第一个字符是ACSII字符中DEL控制符,后三个是ELF的ACSII码

第5个字节为Class位,0为无效文件,1为32位文件,2为64位文件

第6个字节指定字节序(Data)有以下取值

0 无效格式

1 小端格式

2 大端格式

根据5和6字节信息选择相应的拷贝方式(如32位小端等)

    def getArch(self):
        if self.__ElfHeader.e_machine == ELFFlags.EM_386 or self.__ElfHeader.e_machine == ELFFlags.EM_X86_64: 
            return CS_ARCH_X86
        elif self.__ElfHeader.e_machine == ELFFlags.EM_ARM:
            return CS_ARCH_ARM
        elif self.__ElfHeader.e_machine == ELFFlags.EM_ARM64:
            return CS_ARCH_ARM64
        elif self.__ElfHeader.e_machine == ELFFlags.EM_MIPS:
            return CS_ARCH_MIPS
        elif self.__ElfHeader.e_machine == ELFFlags.EM_PowerPC:
            return CS_ARCH_PPC
        elif self.__ElfHeader.e_machine == ELFFlags.EM_SPARCv8p:
            return CS_ARCH_SPARC
        else:
            print("[Error] ELF.getArch() - Architecture not supported")
            return None
e_machine是一个双字节(19,20字节)的表示CPU平台属性的成员

之后执行函数

    def __setShdr(self):<span style="white-space:pre">	</span>#设置段头部
        shdr_num = self.__ElfHeader.e_shnum    #段数量
        base = self.__binary[self.__ElfHeader.e_shoff:]#获取段表
        shdr_l = []

        e_ident = self.__binary[:15]
        ei_data = e_ident[ELFFlags.EI_DATA]

        for i in range(shdr_num):

            if self.getArchMode() == CS_MODE_32:
                if   ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf32_Shdr_LSB.from_buffer_copy(base)#32位小端格式拷贝
                elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf32_Shdr_MSB.from_buffer_copy(base)
            elif self.getArchMode() == CS_MODE_64:
                if   ei_data == ELFFlags.ELFDATA2LSB: shdr = Elf64_Shdr_LSB.from_buffer_copy(base)
                elif ei_data == ELFFlags.ELFDATA2MSB: shdr = Elf64_Shdr_MSB.from_buffer_copy(base)

            self.__shdr_l.append(shdr)
            base = base[self.__ElfHeader.e_shentsize:]

        # setup name from the strings table
        if self.__ElfHeader.e_shstrndx != 0:
            string_table = str(self.__binary[(self.__shdr_l[self.__ElfHeader.e_shstrndx].sh_offset):])
            for i in range(shdr_num):
                self.__shdr_l[i].str_name = string_table[self.__shdr_l[i].sh_name:].split('\0')[0]
该函数处理段头部

第一行self.__ElfHeader.e_shnum表示段数量,e_shnum也是一个双字节成员(48,49字节),在本机的实际运行中可以看到libc.so的段数量为32(好TM多)

第二行self.__ElfHeader.e_shoff代表段表在文件中的偏移,32位版本中为4字节(33,34,35,36字节),实际运行值为0x4b88f

之后同样按照32位小端格式从段表中拷贝添加到self.__shdr_l

self.__ElfHeader.e_shentsize指段表描述符大小,双字节(47,48字节),实际运行值40

self.__ElfHeader.e_shstrndx指段表字符串表所在段在段表中的下标,双字节(51,52字节),实际运行值为31,也就是说段表中最后一个段是段表字符串表所在段(好拗口)

根据这个值找到字符串所在段,然后依次分给各段


下一个函数

def __setPhdr(self):
        pdhr_num = self.__ElfHeader.e_phnum
        base = self.__binary[self.__ElfHeader.e_phoff:]
        phdr_l = []

        e_ident = self.__binary[:15]
        ei_data = e_ident[ELFFlags.EI_DATA]

        for i in range(pdhr_num):
            if self.getArchMode() == CS_MODE_32:
                if   ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf32_Phdr_LSB.from_buffer_copy(base)
                elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf32_Phdr_MSB.from_buffer_copy(base)
            elif self.getArchMode() == CS_MODE_64:
                if   ei_data == ELFFlags.ELFDATA2LSB: phdr = Elf64_Phdr_LSB.from_buffer_copy(base)
                elif ei_data == ELFFlags.ELFDATA2MSB: phdr = Elf64_Phdr_MSB.from_buffer_copy(base)

            self.__phdr_l.append(phdr)
            base = base[self.__ElfHeader.e_phentsize:]
self.__ElfHeader.e_phnum是ELF执行视图中Segment的个数,双字节(45,46),实际结果9

self.__ElfHeader.e_phoff是Segment的偏移,双字节(43,44)

之后和段表一样,放入self__phdr_l










發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章