本文用于记录对QEMU对ELF文件加载函数进行分析。根据“函数使用->函数定义->函数实现->函数实现的分析”的顺序进行分析,最终提取出ELF文件加载的代码。

1. load_elf

mips malta中,对load_elf的使用如下:

    if (load_elf(loaderparams.kernel_filename, cpu_mips_kseg0_to_phys, NULL,
(uint64_t *)&kernel_entry, NULL, (uint64_t *)&kernel_high,
big_endian, ELF_MACHINE, ) < ) {
fprintf(stderr, "qemu: could not load kernel '%s'\n",
loaderparams.kernel_filename);
exit();
}

load_elf在头文件include/hw/loader.h中,函数定义如下:

int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
uint64_t *highaddr, int big_endian, int elf_machine,
int clear_lsb);

load_elf的具体实现在hw/core/loader.c中,函数体如下:

int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
uint64_t *highaddr, int big_endian, int elf_machine, int clear_lsb)
{
int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
uint8_t e_ident[EI_NIDENT]; fd = open(filename, O_RDONLY | O_BINARY);
if (fd < ) {
perror(filename);
return -;
}
if (read(fd, e_ident, sizeof(e_ident)) != sizeof(e_ident))
goto fail;
if (e_ident[] != ELFMAG0 ||
e_ident[] != ELFMAG1 ||
e_ident[] != ELFMAG2 ||
e_ident[] != ELFMAG3) {
ret = ELF_LOAD_NOT_ELF;
goto fail;
}
#ifdef HOST_WORDS_BIGENDIAN
data_order = ELFDATA2MSB;
#else
data_order = ELFDATA2LSB;
#endif
must_swab = data_order != e_ident[EI_DATA];
if (big_endian) {
target_data_order = ELFDATA2MSB;
} else {
target_data_order = ELFDATA2LSB;
} if (target_data_order != e_ident[EI_DATA]) {
ret = ELF_LOAD_WRONG_ENDIAN;
goto fail;
} lseek(fd, , SEEK_SET);
if (e_ident[EI_CLASS] == ELFCLASS64) {
ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb);
} else {
ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
pentry, lowaddr, highaddr, elf_machine, clear_lsb);
} fail:
close(fd);
return ret;
}

在load_elf中,对elf文件进行读取分析的核心函数为load_elf64和load_elf32,下面将把它们进行展开。

2、load_elf64/load_elf32

load_elf64和load_elf32是通过glue(load_elf, SZ)来进行定义的,所在文件include/hw/elf_ops.h,具体函数如下:

static int glue(load_elf, SZ)(const char *name, int fd,
uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque,
int must_swab, uint64_t *pentry,
uint64_t *lowaddr, uint64_t *highaddr,
int elf_machine, int clear_lsb)
{
struct elfhdr ehdr;
struct elf_phdr *phdr = NULL, *ph;
int size, i, total_size;
elf_word mem_size, file_size;
uint64_t addr, low = (uint64_t)-, high = ;
uint8_t *data = NULL;
char label[];
int ret = ELF_LOAD_FAILED; if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
goto fail;
if (must_swab) {
glue(bswap_ehdr, SZ)(&ehdr);
} switch (elf_machine) {
case EM_PPC64:
if (EM_PPC64 != ehdr.e_machine)
if (EM_PPC != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
break;
case EM_X86_64:
if (EM_X86_64 != ehdr.e_machine)
if (EM_386 != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
break;
case EM_MICROBLAZE:
if (EM_MICROBLAZE != ehdr.e_machine)
if (EM_MICROBLAZE_OLD != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
break;
default:
if (elf_machine != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
} if (pentry)
*pentry = (uint64_t)(elf_sword)ehdr.e_entry; glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb); size = ehdr.e_phnum * sizeof(phdr[]);
if (lseek(fd, ehdr.e_phoff, SEEK_SET) != ehdr.e_phoff) {
goto fail;
}
phdr = g_malloc0(size);
if (!phdr)
goto fail;
if (read(fd, phdr, size) != size)
goto fail;
if (must_swab) {
for(i = ; i < ehdr.e_phnum; i++) {
ph = &phdr[i];
glue(bswap_phdr, SZ)(ph);
}
} total_size = ;
for(i = ; i < ehdr.e_phnum; i++) {
ph = &phdr[i];
if (ph->p_type == PT_LOAD) {
mem_size = ph->p_memsz; /* Size of the ROM */
file_size = ph->p_filesz; /* Size of the allocated data */
data = g_malloc0(file_size);
if (ph->p_filesz > ) {
if (lseek(fd, ph->p_offset, SEEK_SET) < ) {
goto fail;
}
if (read(fd, data, file_size) != file_size) {
goto fail;
}
}
/* address_offset is hack for kernel images that are
linked at the wrong physical address. */
if (translate_fn) {
addr = translate_fn(translate_opaque, ph->p_paddr);
glue(elf_reloc, SZ)(&ehdr, fd, must_swab, translate_fn,
translate_opaque, data, ph, elf_machine);
} else {
addr = ph->p_paddr;
} /* the entry pointer in the ELF header is a virtual
* address, if the text segments paddr and vaddr differ
* we need to adjust the entry */
if (pentry && !translate_fn &&
ph->p_vaddr != ph->p_paddr &&
ehdr.e_entry >= ph->p_vaddr &&
ehdr.e_entry < ph->p_vaddr + ph->p_filesz &&
ph->p_flags & PF_X) {
*pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
} snprintf(label, sizeof(label), "phdr #%d: %s", i, name); /* rom_add_elf_program() seize the ownership of 'data' */
rom_add_elf_program(label, data, file_size, mem_size, addr); total_size += mem_size;
if (addr < low)
low = addr;
if ((addr + mem_size) > high)
high = addr + mem_size; data = NULL;
}
}
g_free(phdr);
if (lowaddr)
*lowaddr = (uint64_t)(elf_sword)low;
if (highaddr)
*highaddr = (uint64_t)(elf_sword)high;
return total_size;
fail:
g_free(data);
g_free(phdr);
return ret;
}

其中,glue在文件include/qemu/compiler.h中,定义如下:

#ifndef glue
#define xglue(x, y) x ## y
#define glue(x, y) xglue(x, y)
#define stringify(s) tostring(s)
#define tostring(s) #s
#endif

根据定义,我们可以知道,“glue(load_elf, SZ)”经过展开,会变成“load_elfSZ”,当SZ为32和64的时候,结果就是load_elf32和load_elf64了。

在文件hw/core/loader.c中,

#define SZ      32
#define elf_word uint32_t
#define elf_sword int32_t
#define bswapSZs bswap32s
#include "hw/elf_ops.h" #undef elfhdr
#undef elf_phdr
#undef elf_shdr
#undef elf_sym
#undef elf_rela
#undef elf_note
#undef elf_word
#undef elf_sword
#undef bswapSZs
#undef SZ
#define elfhdr elf64_hdr
#define elf_phdr elf64_phdr
#define elf_note elf64_note
#define elf_shdr elf64_shdr
#define elf_sym elf64_sym
#define elf_rela elf64_rela
#define elf_word uint64_t
#define elf_sword int64_t
#define bswapSZs bswap64s
#define SZ 64
#include "hw/elf_ops.h"

我们可以看到,loader.c在包涵elf_ops.h的时候,先对SZ等进行了宏定义。通过两次宏定义和包含,就得到了load_elf32和load_elf64。

在glue(load_elf, SZ)中,使用了以下4个函数:

glue(bswap_ehdr, SZ)(&ehdr);

glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb);

glue(bswap_phdr, SZ)(ph);

glue(elf_reloc, SZ)(&ehdr, fd, must_swab,  translate_fn,
translate_opaque, data, ph, elf_machine);

它们都在文件include/hw/elf_ops.h中。elf_ops.h中的其它函数也会在解析elf文件时用到,所以我们将elf_ops.h全部贴出来

3、 include/hw/elf_ops.h,文件内容如下:

static void glue(bswap_ehdr, SZ)(struct elfhdr *ehdr)
{
bswap16s(&ehdr->e_type); /* Object file type */
bswap16s(&ehdr->e_machine); /* Architecture */
bswap32s(&ehdr->e_version); /* Object file version */
bswapSZs(&ehdr->e_entry); /* Entry point virtual address */
bswapSZs(&ehdr->e_phoff); /* Program header table file offset */
bswapSZs(&ehdr->e_shoff); /* Section header table file offset */
bswap32s(&ehdr->e_flags); /* Processor-specific flags */
bswap16s(&ehdr->e_ehsize); /* ELF header size in bytes */
bswap16s(&ehdr->e_phentsize); /* Program header table entry size */
bswap16s(&ehdr->e_phnum); /* Program header table entry count */
bswap16s(&ehdr->e_shentsize); /* Section header table entry size */
bswap16s(&ehdr->e_shnum); /* Section header table entry count */
bswap16s(&ehdr->e_shstrndx); /* Section header string table index */
} static void glue(bswap_phdr, SZ)(struct elf_phdr *phdr)
{
bswap32s(&phdr->p_type); /* Segment type */
bswapSZs(&phdr->p_offset); /* Segment file offset */
bswapSZs(&phdr->p_vaddr); /* Segment virtual address */
bswapSZs(&phdr->p_paddr); /* Segment physical address */
bswapSZs(&phdr->p_filesz); /* Segment size in file */
bswapSZs(&phdr->p_memsz); /* Segment size in memory */
bswap32s(&phdr->p_flags); /* Segment flags */
bswapSZs(&phdr->p_align); /* Segment alignment */
} static void glue(bswap_shdr, SZ)(struct elf_shdr *shdr)
{
bswap32s(&shdr->sh_name);
bswap32s(&shdr->sh_type);
bswapSZs(&shdr->sh_flags);
bswapSZs(&shdr->sh_addr);
bswapSZs(&shdr->sh_offset);
bswapSZs(&shdr->sh_size);
bswap32s(&shdr->sh_link);
bswap32s(&shdr->sh_info);
bswapSZs(&shdr->sh_addralign);
bswapSZs(&shdr->sh_entsize);
} static void glue(bswap_sym, SZ)(struct elf_sym *sym)
{
bswap32s(&sym->st_name);
bswapSZs(&sym->st_value);
bswapSZs(&sym->st_size);
bswap16s(&sym->st_shndx);
} static void glue(bswap_rela, SZ)(struct elf_rela *rela)
{
bswapSZs(&rela->r_offset);
bswapSZs(&rela->r_info);
bswapSZs((elf_word *)&rela->r_addend);
} static struct elf_shdr *glue(find_section, SZ)(struct elf_shdr *shdr_table,
int n, int type)
{
int i;
for(i=0;i<n;i++) {
if (shdr_table[i].sh_type == type)
return shdr_table + i;
}
return NULL;
} static int glue(symfind, SZ)(const void *s0, const void *s1)
{
hwaddr addr = *(hwaddr *)s0;
struct elf_sym *sym = (struct elf_sym *)s1;
int result = 0;
if (addr < sym->st_value) {
result = -1;
} else if (addr >= sym->st_value + sym->st_size) {
result = 1;
}
return result;
} static const char *glue(lookup_symbol, SZ)(struct syminfo *s,
hwaddr orig_addr)
{
struct elf_sym *syms = glue(s->disas_symtab.elf, SZ);
struct elf_sym *sym; sym = bsearch(&orig_addr, syms, s->disas_num_syms, sizeof(*syms),
glue(symfind, SZ));
if (sym != NULL) {
return s->disas_strtab + sym->st_name;
} return "";
} static int glue(symcmp, SZ)(const void *s0, const void *s1)
{
struct elf_sym *sym0 = (struct elf_sym *)s0;
struct elf_sym *sym1 = (struct elf_sym *)s1;
return (sym0->st_value < sym1->st_value)
? -1
: ((sym0->st_value > sym1->st_value) ? 1 : 0);
} static int glue(load_symbols, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
int clear_lsb)
{
struct elf_shdr *symtab, *strtab, *shdr_table = NULL;
struct elf_sym *syms = NULL;
struct syminfo *s;
int nsyms, i;
char *str = NULL; shdr_table = load_at(fd, ehdr->e_shoff,
sizeof(struct elf_shdr) * ehdr->e_shnum);
if (!shdr_table)
return -1; if (must_swab) {
for (i = 0; i < ehdr->e_shnum; i++) {
glue(bswap_shdr, SZ)(shdr_table + i);
}
} symtab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_SYMTAB);
if (!symtab)
goto fail;
syms = load_at(fd, symtab->sh_offset, symtab->sh_size);
if (!syms)
goto fail; nsyms = symtab->sh_size / sizeof(struct elf_sym); i = 0;
while (i < nsyms) {
if (must_swab)
glue(bswap_sym, SZ)(&syms[i]);
/* We are only interested in function symbols.
Throw everything else away. */
if (syms[i].st_shndx == SHN_UNDEF ||
syms[i].st_shndx >= SHN_LORESERVE ||
ELF_ST_TYPE(syms[i].st_info) != STT_FUNC) {
nsyms--;
if (i < nsyms) {
syms[i] = syms[nsyms];
}
continue;
}
if (clear_lsb) {
/* The bottom address bit marks a Thumb or MIPS16 symbol. */
syms[i].st_value &= ~(glue(glue(Elf, SZ), _Addr))1;
}
i++;
}
syms = g_realloc(syms, nsyms * sizeof(*syms)); qsort(syms, nsyms, sizeof(*syms), glue(symcmp, SZ));
for (i = 0; i < nsyms - 1; i++) {
if (syms[i].st_size == 0) {
syms[i].st_size = syms[i + 1].st_value - syms[i].st_value;
}
} /* String table */
if (symtab->sh_link >= ehdr->e_shnum)
goto fail;
strtab = &shdr_table[symtab->sh_link]; str = load_at(fd, strtab->sh_offset, strtab->sh_size);
if (!str)
goto fail; /* Commit */
s = g_malloc0(sizeof(*s));
s->lookup_symbol = glue(lookup_symbol, SZ);
glue(s->disas_symtab.elf, SZ) = syms;
s->disas_num_syms = nsyms;
s->disas_strtab = str;
s->next = syminfos;
syminfos = s;
g_free(shdr_table);
return 0;
fail:
g_free(syms);
g_free(str);
g_free(shdr_table);
return -1;
} static int glue(elf_reloc, SZ)(struct elfhdr *ehdr, int fd, int must_swab,
uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque, uint8_t *data,
struct elf_phdr *ph, int elf_machine)
{
struct elf_shdr *reltab, *shdr_table = NULL;
struct elf_rela *rels = NULL;
int nrels, i, ret = -1;
elf_word wordval;
void *addr; shdr_table = load_at(fd, ehdr->e_shoff,
sizeof(struct elf_shdr) * ehdr->e_shnum);
if (!shdr_table) {
return -1;
}
if (must_swab) {
for (i = 0; i < ehdr->e_shnum; i++) {
glue(bswap_shdr, SZ)(&shdr_table[i]);
}
} reltab = glue(find_section, SZ)(shdr_table, ehdr->e_shnum, SHT_RELA);
if (!reltab) {
goto fail;
}
rels = load_at(fd, reltab->sh_offset, reltab->sh_size);
if (!rels) {
goto fail;
}
nrels = reltab->sh_size / sizeof(struct elf_rela); for (i = 0; i < nrels; i++) {
if (must_swab) {
glue(bswap_rela, SZ)(&rels[i]);
}
if (rels[i].r_offset < ph->p_vaddr ||
rels[i].r_offset >= ph->p_vaddr + ph->p_filesz) {
continue;
}
addr = &data[rels[i].r_offset - ph->p_vaddr];
switch (elf_machine) {
case EM_S390:
switch (rels[i].r_info) {
case R_390_RELATIVE:
wordval = *(elf_word *)addr;
if (must_swab) {
bswapSZs(&wordval);
}
wordval = translate_fn(translate_opaque, wordval);
if (must_swab) {
bswapSZs(&wordval);
}
*(elf_word *)addr = wordval;
break;
default:
fprintf(stderr, "Unsupported relocation type %i!\n",
(int)rels[i].r_info);
}
}
} ret = 0;
fail:
g_free(rels);
g_free(shdr_table);
return ret;
} static int glue(load_elf, SZ)(const char *name, int fd,
uint64_t (*translate_fn)(void *, uint64_t),
void *translate_opaque,
int must_swab, uint64_t *pentry,
uint64_t *lowaddr, uint64_t *highaddr,
int elf_machine, int clear_lsb)
{
struct elfhdr ehdr;
struct elf_phdr *phdr = NULL, *ph;
int size, i, total_size;
elf_word mem_size, file_size;
uint64_t addr, low = (uint64_t)-1, high = 0;
uint8_t *data = NULL;
char label[128];
int ret = ELF_LOAD_FAILED; if (read(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
goto fail;
if (must_swab) {
glue(bswap_ehdr, SZ)(&ehdr);
} switch (elf_machine) {
case EM_PPC64:
if (EM_PPC64 != ehdr.e_machine)
if (EM_PPC != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
break;
case EM_X86_64:
if (EM_X86_64 != ehdr.e_machine)
if (EM_386 != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
break;
case EM_MICROBLAZE:
if (EM_MICROBLAZE != ehdr.e_machine)
if (EM_MICROBLAZE_OLD != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
break;
default:
if (elf_machine != ehdr.e_machine) {
ret = ELF_LOAD_WRONG_ARCH;
goto fail;
}
} if (pentry)
*pentry = (uint64_t)(elf_sword)ehdr.e_entry; glue(load_symbols, SZ)(&ehdr, fd, must_swab, clear_lsb); size = ehdr.e_phnum * sizeof(phdr[0]);
if (lseek(fd, ehdr.e_phoff, SEEK_SET) != ehdr.e_phoff) {
goto fail;
}
phdr = g_malloc0(size);
if (!phdr)
goto fail;
if (read(fd, phdr, size) != size)
goto fail;
if (must_swab) {
for(i = 0; i < ehdr.e_phnum; i++) {
ph = &phdr[i];
glue(bswap_phdr, SZ)(ph);
}
} total_size = 0;
for(i = 0; i < ehdr.e_phnum; i++) {
ph = &phdr[i];
if (ph->p_type == PT_LOAD) {
mem_size = ph->p_memsz; /* Size of the ROM */
file_size = ph->p_filesz; /* Size of the allocated data */
data = g_malloc0(file_size);
if (ph->p_filesz > 0) {
if (lseek(fd, ph->p_offset, SEEK_SET) < 0) {
goto fail;
}
if (read(fd, data, file_size) != file_size) {
goto fail;
}
}
/* address_offset is hack for kernel images that are
linked at the wrong physical address. */
if (translate_fn) {
addr = translate_fn(translate_opaque, ph->p_paddr);
glue(elf_reloc, SZ)(&ehdr, fd, must_swab, translate_fn,
translate_opaque, data, ph, elf_machine);
} else {
addr = ph->p_paddr;
} /* the entry pointer in the ELF header is a virtual
* address, if the text segments paddr and vaddr differ
* we need to adjust the entry */
if (pentry && !translate_fn &&
ph->p_vaddr != ph->p_paddr &&
ehdr.e_entry >= ph->p_vaddr &&
ehdr.e_entry < ph->p_vaddr + ph->p_filesz &&
ph->p_flags & PF_X) {
*pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
} snprintf(label, sizeof(label), "phdr #%d: %s", i, name); /* rom_add_elf_program() seize the ownership of 'data' */
rom_add_elf_program(label, data, file_size, mem_size, addr); total_size += mem_size;
if (addr < low)
low = addr;
if ((addr + mem_size) > high)
high = addr + mem_size; data = NULL;
}
}
g_free(phdr);
if (lowaddr)
*lowaddr = (uint64_t)(elf_sword)low;
if (highaddr)
*highaddr = (uint64_t)(elf_sword)high;
return total_size;
fail:
g_free(data);
g_free(phdr);
return ret;
}

其中,bswap16s在文件include/qemu/bswap.h中,其定义如下:

static inline void bswap16s(uint16_t *s)
{
*s = bswap16(*s);
} static inline uint16_t bswap16(uint16_t x)
{
return bswap_16(x);
}

4、load_at

load_at在hw/core/loader.c中,其定义如下

static void *load_at(int fd, off_t offset, size_t size)
{
void *ptr;
if (lseek(fd, offset, SEEK_SET) < 0)
return NULL;
ptr = g_malloc(size);
if (read(fd, ptr, size) != size) {
g_free(ptr);
return NULL;
}
return ptr;
}

5、ELF文件格式:关于ELF文件格式的内容摘自百度百科

在计算机科学中,是一种用于二进制文件、可执行文件、目标代码、共享库和核心转储的标准文件格式。

是UNIX系统实验室(USL)作为应用程序二进制接口(Application Binary Interface,ABI)而开发和发布的,也是Linux的主要可执行文件格式。

1999年,被86open项目选为x86架构上的类Unix操作系统的二进制文件标准格式,用来取代COFF。因其可扩展性与灵活性,也可应用在其它处理器、计算机系统架构的操作系统上。

ELF文件由4部分组成,分别是ELF头(ELF header)、程序头表(Program header table)、节(Section)和节头表(Section header table)。实际上,一个文件中不一定包含全部内容,而且他们的位置也未必如同所示这样安排,只有ELF头的位置是固定的,其余各部分的位置、大小等信息有ELF头中的各项值来决定。

QEMU ELF_LOAER分析[基于MIPS]-LMLPHP

05-11 22:50