我正在编写自己的动态链接器/加载器
我的主要担忧是使BASE ADDRESS正确有效,因为目前它是0(无)
我已经11个月没有从事此工作了,可能需要重新学习我的大多数链接器/加载器以及ELF规范
旨在提供以下功能:
例如,如果它可以用glibc编译但不能用musl编译,那么它就坏了,因此依赖glibc进行编译,如果在基于musl的发行版上进行编译,显然不是一个选择
例如,提供的$ APP_LOCATION是app / bin / app,它将/ lib / blah重定向到app / bin /../ lib / blah,如果该路径不存在,则回退到/ lib / blah
这是在gnu动态链接器中无法实现的,并且无法在ld.so中预加载。
例如:
(由于发布文件将导致超过30,000个字符)
test_loader.c: https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test_loader.c
additional_flags="-w"
debug="-g3 -O0 $additional_flags"
share="$debug -fPIC -shared"
void * test = dlopen(“ ./ files / test_lib.so”):gcc $ share test_lib.c test_lib2.c -o files / test_lib.so:https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test_lib.c和https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test_lib2.c >
void * testCPlusPlus = dlopen(“ ./ files / test ++ _ lib.so”):g ++ $ share test ++ _ lib.cpp test ++ _ lib2.cpp -o files / test ++ _ lib.so:{ {3}}和https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test%2B%2B_lib.cpp
到目前为止,该可以为几乎所有库(如编译为共享库的所有库,而不是所有libc变体,例如musl或klib)都适用,除了glibc需要初始化stdin / stdout等变量/ stderr和其他人(通过strlen,write()正常工作,未测试strcmp和其他人,但未执行printf并在执行时挂起
musl似乎没有这个问题,并且它的所有功能似乎都可以正常工作,尽管我没有对它进行广泛的测试,strlen,write,puts和printf在musl中都没有问题
(由于ELF规范似乎将其称为动态加载程序,而gnu则将其称为动态链接器,因此它被正式称为idk)
在动态链接期间,如何获得基地址以初始化共享对象文件
已授予我无法按原样发布链接器的完整代码的信息 257,903个字符,且帖子数限制为 30,000个字符
R_x86_64_RELATIVE重定位需要此基地址,我认为其中包含DT_INIT和DT_INIT_ARRAY函数/函数数组指针
当我试图弄清楚时,我得到了……好吧,我得到了两件事
我的重定位段出错:
R_X86_64_RELATIVE calculation: B + A (base address + r_addend)
library[library_index].base_address = (nil)
reloc->r_offset = (nil)+0x000000200e78=0x000000200e78
reloc->r_addend = (nil)+0x000000000590=0x000000000590
((char**)((char*)library[library_index].base_address + reloc->r_offset)) = 0x200e78
Segmentation fault
出现问题编号2
基址不正确
searching indexes for "./DT_INIT.so" incase it has already been loaded
current index 0 holds "./DT_INIT.so"
index 0 holds desired library "./DT_INIT.so"
map succeded with address: 0x7f132f66a000
calling round_up
returning = 0x7f1320000000
memmove: round_up(0x7f131dbe0010, 0x000010000000)+0x000000200000 = 0x7f1320200000
calling round_up
returning = 0x7f1320000000
dest = 0x7f1320200000
dest = 0x7f1320000000
library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr = 0x000000200e78
library[library_index]._elf_program_header[lowest_idx].p_paddr = 0x000000200e78
library[library_index]._elf_program_header[lowest_idx].p_vaddr = 0x000000200e78
calling round_down
returning = 0x000000200000
calling round_down
returning = 0x000000200000
calling round_nearest
returning = 0x000000201000
base address range = 0x7f1320000000 - 0x7f1320201028
mapping = 0x7f1320000000
base address = (nil)
通过
完成dlopen(lib) > dlopen_(lib) > init_(lib) > {
> init(lib) // mmaps the lib
> map()
**base address is assigned in the function map(), which aswell
as assigning the base address, also maps the PT_LOADS**
}
int init(char * lib) {
if (library[library_index].struct_init != "initialized") init_struct();
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "current index %d holds \"%s\"\nsearching indexes for \"%s\" incase it has already been loaded\n", library_index, library[library_index].last_lib, lib);
library_index = search(lib);
library[library_index].last_lib = lib;
library[library_index].current_lib = lib;
if (library[library_index].array == NULL) {
int fd = open(lib, O_RDONLY);
if (fd < 0) {
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "cannot open \"%s\", returned %i\n", lib, fd);
return -1;
}
library[library_index].len = 0;
library[library_index].len = lseek(fd, 0, SEEK_END);
lseek(fd, 0, 0);
library[library_index].array = mmap (NULL, library[library_index].len, PROT_READ, MAP_PRIVATE, fd, 0);
if (library[library_index].array == MAP_FAILED) {
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "map failed\n");
exit;
} else {
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "map succeded with address: %014p\n", library[library_index].array);
return 0;
}
} else return 0;
return -1;
}
int prot_from_phdr(const int p_flags)
{
int prot = 0;
if (p_flags & PF_R)
{
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PROT_READ|");
prot |= PROT_READ;
}
if (p_flags & PF_W)
{
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PROT_WRITE|");
prot |= PROT_WRITE;
}
if (p_flags & PF_X)
{
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PROT_EXEC|");
prot |= PROT_EXEC;
}
return prot;
}
uintptr_t round_nearest(uintptr_t value, uintptr_t size)
{
printf("calling %s\n", __func__);
uintptr_t result = 0;
uintptr_t remainder = value % size;
if (remainder < size/2) { result = value - remainder; } else { result = value + size - remainder; }
printf("returning = %014p\n", result);
return result;
}
uintptr_t round_down(uintptr_t value, uintptr_t size)
{
printf("calling %s\n", __func__);
uintptr_t result = 0;
result = (value/size)*size;
printf("returning = %014p\n", result);
return result;
}
uintptr_t round_up(uintptr_t value, uintptr_t size)
{
printf("calling %s\n", __func__);
uintptr_t result = 0;
result = value ? size * ((value + (size - 1)) / size) : size;
printf("returning = %014p\n", result);
return result;
}
// special version specifically for PT_LOAD handling
int read_fast_verifyb(const char *src, int len_of_source, char **dest, int requested_len, Elf64_Phdr PT_LOAD_F, Elf64_Phdr PT_LOAD_L) {
void * align = 0x10000000;
*dest = malloc(requested_len+align+PT_LOAD_L.p_align);
if (len_of_source < requested_len) memcpy(*dest, src, len_of_source);
else memcpy(*dest, src, requested_len);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "memmove: round_up(%014p, %014p)+%014p = %014p\n", *dest, align, PT_LOAD_L.p_align, round_up(*dest, align)+PT_LOAD_L.p_align);
*dest = memmove(round_up(*dest, align)+PT_LOAD_L.p_align, *dest, requested_len);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "dest = %014p\n", *dest);
*dest = memmove(*dest-PT_LOAD_L.p_align, *dest, PT_LOAD_F.p_memsz);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "dest = %014p\n", *dest);
return requested_len;
}
void map() {
if (library[library_index].is_mapped == 0) {
library[library_index]._elf_header = (Elf64_Ehdr *) library[library_index].array;
library[library_index]._elf_program_header = (Elf64_Phdr *)((unsigned long)library[library_index]._elf_header + library[library_index]._elf_header->e_phoff);
/*
the very first thing we do is obtain the base address
Base Address
The virtual addresses in the program headers might not represent the actual virtual addresses
of the program's memory image. Executable files typically contain absolute code. To let the
process execute correctly, the segments must reside at the virtual addresses used to build the
executable file. On the other hand, shared object segments typically contain
position-independent code. This lets a segment's virtual address change from one process to
another, without invalidating execution behavior. Though the system chooses virtual addresses
for individual processes, it maintains the segments’ relative positions. Because
position-independent code uses relative addressing between segments, the difference between
virtual addresses in memory must match the difference between virtual addresses in the file.
The difference between the virtual address of any segment in memory and the corresponding
virtual address in the file is thus a single constant value for any one executable or shared object
in a given process. This difference is the base address. One use of the base address is to relocate
the memory image of the program during dynamic linking.
An executable or shared object file's base address is calculated during execution from three
values: the virtual memory load address, the maximum page size, and the lowest virtual address
of a program's loadable segment. To compute the base address, one determines the memory
address associated with the lowest p_vaddr value for a PT_LOAD segment. This address is
truncated to the nearest multiple of the maximum page size. The corresponding p_vaddr value
itself is also truncated to the nearest multiple of the maximum page size. The base address is
the difference between the truncated memory address and the truncated p_vaddr value.
*/
// aquire the first and last PT_LOAD'S
int PT_LOADS=0;
for (int i = 0; i < library[library_index]._elf_header->e_phnum; ++i) {
switch(library[library_index]._elf_program_header[i].p_type)
{
case PT_LOAD:
// if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "i = %d\n", i);
// if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PT_LOADS = %d\n", PT_LOADS);
if (!PT_LOADS) {
// if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "saving first load\n");
library[library_index].First_Load_Header_index = i;
}
if (PT_LOADS) {
// if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "saving last load\n");
library[library_index].Last_Load_Header_index = i;
}
PT_LOADS=PT_LOADS+1;
break;
}
}
size_t span = library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr + library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_memsz - library[library_index]._elf_program_header[library[library_index].First_Load_Header_index].p_vaddr;
read_fast_verifyb(library[library_index].array, library[library_index].len, &library[library_index].mapping_start, span, library[library_index]._elf_program_header[library[library_index].First_Load_Header_index], library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index]);
fprintf(stderr, "library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr = %014p\n", library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr);
// aquire the lowest PT_LOAD'S
Elf64_Addr lowest_p_vaddr = 0;
int lowest_idx = -1;
for (int i = 0; i < library[library_index]._elf_header->e_phnum; ++i) {
switch(library[library_index]._elf_program_header[i].p_type)
{
case PT_LOAD:
if (!lowest_p_vaddr) {
lowest_p_vaddr = library[library_index]._elf_program_header[i].p_vaddr;
lowest_idx = i;
}
if (lowest_p_vaddr < library[library_index]._elf_program_header[i].p_memsz) {
lowest_p_vaddr = library[library_index]._elf_program_header[i].p_vaddr;
lowest_idx = i;
}
break;
}
}
size_t pagesize = 0x1000;
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "library[library_index]._elf_program_header[lowest_idx].p_paddr = %014p\nlibrary[library_index]._elf_program_header[lowest_idx].p_vaddr = %014p\n",library[library_index]._elf_program_header[lowest_idx].p_paddr, library[library_index]._elf_program_header[lowest_idx].p_vaddr);
Elf64_Addr truncated_physical_address = round_down(library[library_index]._elf_program_header[lowest_idx].p_paddr, pagesize);
Elf64_Addr truncated_virtual_address = round_down(library[library_index]._elf_program_header[lowest_idx].p_vaddr, pagesize);
library[library_index].base_address = truncated_physical_address - truncated_virtual_address;
// library[library_index].base_address = library[library_index].mapping_start;
library[library_index].align = round_nearest(library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr, pagesize);
// library[library_index].base_address = library[library_index].mapping_start - library[library_index].align;
library[library_index].mapping_end = library[library_index].mapping_start+span;
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "base address range = %014p - %014p\nmapping = %014p\nbase address = %014p\n", library[library_index].mapping_start, library[library_index].mapping_end, library[library_index].mapping_start, library[library_index].base_address);
// abort_();
// base address aquired, map all PT_LOAD segments adjusting by base address then continue with the rest
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\n\n\nfind %014p, %014p, (int) 1239\n\n\n\n", library[library_index].mapping_start, library[library_index].mapping_end);
if (library[library_index].mapping_start == 0x00000000) abort_();
int PT_LOADS_CURRENT = 0;
for (int i = 0; i < library[library_index]._elf_header->e_phnum; ++i) {
switch(library[library_index]._elf_program_header[i].p_type)
{
case PT_LOAD:
PT_LOADS_CURRENT = PT_LOADS_CURRENT + 1;
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mapping PT_LOAD number %d\n", PT_LOADS_CURRENT);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_flags: %014p\n", library[library_index]._elf_program_header[i].p_flags);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_offset: %014p\n", library[library_index]._elf_program_header[i].p_offset);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_vaddr: %014p\n", library[library_index]._elf_program_header[i].p_vaddr);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_paddr: %014p\n", library[library_index]._elf_program_header[i].p_paddr);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_filesz: %014p\n", library[library_index]._elf_program_header[i].p_filesz);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_memsz: %014p\n", library[library_index]._elf_program_header[i].p_memsz);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_align: %014p\n\n", library[library_index]._elf_program_header[i].p_align);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\tp_flags: %014p", library[library_index]._elf_program_header[i].p_flags);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_offset: %014p", library[library_index]._elf_program_header[i].p_offset);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_vaddr: %014p", library[library_index]._elf_program_header[i].p_vaddr);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_paddr: %014p", library[library_index]._elf_program_header[i].p_paddr);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_filesz: %014p", library[library_index]._elf_program_header[i].p_filesz);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_memsz: %014p", library[library_index]._elf_program_header[i].p_memsz);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_align: %014p\n\n\n", library[library_index]._elf_program_header[i].p_align);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mprotect(%014p+round_down(%014p, %014p), %014p, ", library[library_index].mapping_start, library[library_index]._elf_program_header[i].p_vaddr, library[library_index]._elf_program_header[i].p_align, library[library_index]._elf_program_header[i].p_memsz);
prot_from_phdr(library[library_index]._elf_program_header[i].p_flags);
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, ");\n");
errno = 0;
int check_mprotect_success = mprotect(library[library_index].mapping_start+round_down(library[library_index]._elf_program_header[i].p_vaddr, library[library_index]._elf_program_header[i].p_align), round_up(library[library_index]._elf_program_header[i].p_memsz, library[library_index]._elf_program_header[i].p_align), library[library_index]._elf_program_header[i].p_flags);
if (errno == 0)
{
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mprotect on %014p succeded with size: %014p\n", library[library_index].mapping_start+round_down(library[library_index]._elf_program_header[i].p_vaddr, library[library_index]._elf_program_header[i].p_align), round_up(library[library_index]._elf_program_header[i].p_memsz, library[library_index]._elf_program_header[i].p_align));
print_maps();
}
else
{
if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mprotect failed with: %s (errno: %d, check_mprotect_success = %d)\n", strerror(errno), errno, check_mprotect_success);
print_maps();
abort_();
}
break;
}
}
library[library_index].is_mapped = 1;
}
}
int
init_(const char * filename) {
init(filename);
if (library[library_index].init__ == 1) return 0;
library[library_index]._elf_header = (Elf64_Ehdr *) library[library_index].array;
read_section_header_table_(library[library_index].array, library[library_index]._elf_header, &library[library_index]._elf_symbol_table);
obtain_rela_plt_size(library[library_index].array, library[library_index]._elf_header, library[library_index]._elf_symbol_table);
if(!strncmp((char*)library[library_index]._elf_header->e_ident, "\177ELF", 4)) {
map();
...
}
...
}
void *
dlopen_(const char * cc)
{
if (library[library_index].init_lock == 1) {
if (bytecmpq(ldd_quiet, "no") == 0) fprintf(stderr, "dlopen: LOCKED\n");
return "-1";
};
if ( if_valid(cc) == -1) {
fprintf(stderr, "\"%s\" not found\n", cc);
errno = 0;
return "-1";
}
init_(cc);
...
}
void *
dlopen(const char * cc) {
get_needed(cc);
return dlopen_(cc);
}
这是库的结构[]
struct lib
{
char * rootlib;
char * rootusrlib;
int init_lock;
char * struct_init;
char * library_name;
char ** NEEDED;
int NEEDED_COUNT;
char library_first_character;
char * library_len;
char * library_symbol;
Elf64_Ehdr * _elf_header;
Elf64_Phdr * _elf_program_header;
Elf64_Shdr * _elf_symbol_table;
char *strtab;
size_t len;
char * array;
char * current_lib;
char * last_lib;
int is_mapped;
size_t align;
Elf64_Addr mapping_start;
Elf64_Addr base_address;
Elf64_Addr mapping_end;
int init__;
int PT_DYNAMIC_;
char * tmp99D;
Elf64_Dyn * dynamic;
int First_Load_Header_index;
int Last_Load_Header_index;
size_t RELA_PLT_SIZE;
int _R_X86_64_NONE;
int _R_X86_64_64;
int _R_X86_64_PC32;
int _R_X86_64_GOT32;
int _R_X86_64_PLT32;
int _R_X86_64_COPY;
int _R_X86_64_GLOB_DAT;
int _R_X86_64_JUMP_SLOT;
int _R_X86_64_RELATIVE;
int _R_X86_64_GOTPCREL;
int _R_X86_64_32;
int _R_X86_64_32S;
int _R_X86_64_16;
int _R_X86_64_PC16;
int _R_X86_64_8;
int _R_X86_64_PC8;
int _R_X86_64_DTPMOD64;
int _R_X86_64_DTPOFF64;
int _R_X86_64_TPOFF64;
int _R_X86_64_TLSGD;
int _R_X86_64_TLSLD;
int _R_X86_64_DTPOFF32;
int _R_X86_64_GOTTPOFF;
int _R_X86_64_TPOFF32;
int _R_X86_64_PC64;
int _R_X86_64_GOTOFF64;
int _R_X86_64_GOTPC32;
int _R_X86_64_GOT64;
int _R_X86_64_GOTPCREL64;
int _R_X86_64_GOTPC64;
int _Deprecated1;
int _R_X86_64_PLTOFF64;
int _R_X86_64_SIZE32;
int _R_X86_64_SIZE64;
int _R_X86_64_GOTPC32_TLSDESC;
int _R_X86_64_TLSDESC_CALL;
int _R_X86_64_TLSDESC;
int _R_X86_64_IRELATIVE;
int _R_X86_64_RELATIVE64;
int _Deprecated2;
int _Deprecated3;
int _R_X86_64_GOTPLT64;
int _R_X86_64_GOTPCRELX;
int _R_X86_64_REX_GOTPCRELX;
int _R_X86_64_NUM;
int _R_X86_64_UNKNOWN;
Elf64_Addr * GOT;
Elf64_Addr * GOT2;
Elf64_Addr * PLT;
} library[512];
extern struct lib library[512];
因为我不能完全确定“截断到最接近的倍数”是什么意思,但是因为p_paddr和p_vaddr是相同的,所以当“基地址是截断的内存地址和截断的内存地址之间的差值”时,对每个参数进行的计算都将为0。 p_vaddr值。”除非我不正确地解释了这一点:
An executable or shared object file's base address is calculated during execution from three
values: the virtual memory load address, the maximum page size, and the lowest virtual address
of a program's loadable segment. To compute the base address, one determines the memory
address associated with the lowest p_vaddr value for a PT_LOAD segment. This address is
truncated to the nearest multiple of the maximum page size. The corresponding p_vaddr value
itself is also truncated to the nearest multiple of the maximum page size. The base address is
the difference between the truncated memory address and the truncated p_vaddr value.