在动态链接/加载期间,如何获得基址

时间:2019-02-12 14:04:10

标签: c elf dynamic-loading

我正在编写自己的动态链接器/加载器

我的主要担忧是使BASE ADDRESS正确有效,因为目前它是0(无)

我已经11个月没有从事此工作了,可能需要重新学习我的大多数链接器/加载器以及ELF规范

旨在提供以下功能:

  1. 能够加载和执行任何libc编译的应用程序,包括glibc,musl,uclibc,klibc等,因此必须独立于此类库,

例如,如果它可以用glibc编译但不能用musl编译,那么它就坏了,因此依赖glibc进行编译,如果在基于musl的发行版上进行编译,显然不是一个选择

  1. 能够将共享对象的加载重定向到$ APP_LOCATION /../

例如,提供的$ APP_LOCATION是app / bin / app,它将/ lib / blah重定向到app / bin /../ lib / blah,如果该路径不存在,则回退到/ lib / blah

这是在gnu动态链接器中无法实现的,并且无法在ld.so中预加载。

  1. 能够从任何共享对象中dlopen任何有效的符号,包括本地符号和嵌套符号(如果已知),以及C ++符号(通过libiberty解码/编码),由于dlsym本身的方式,也无法通过钩挂dlsym来完成有效(例如,仅限于全局链接符号)

例如:

(由于发布文件将导致超过30,000个字符)

test_loader.c: https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test_loader.c

additional_flags="-w"
debug="-g3 -O0 $additional_flags"
share="$debug -fPIC -shared"

void * test = dlopen(“ ./ files / test_lib.so”):gcc $ share test_lib.c test_lib2.c -o files / test_lib.so:https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test_lib.chttps://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test_lib2.c

void * testCPlusPlus = dlopen(“ ./ files / test ++ _ lib.so”):g ++ $ share test ++ _ lib.cpp test ++ _ lib2.cpp -o files / test ++ _ lib.so:{ {3}}和https://github.com/mgood7123/universal-dynamic-loader/blob/master/loader/test%2B%2B_lib.cpp

到目前为止,该可以为几乎所有库(如编译为共享库的所有库,而不是所有libc变体,例如musl或klib)都适用,除了glibc需要初始化stdin / stdout等变量/ stderr和其他人(通过strlen,write()正常工作,未测试strcmp和其他人,但未执行printf并在执行时挂起

musl似乎没有这个问题,并且它的所有功能似乎都可以正常工作,尽管我没有对它进行广泛的测试,strlen,write,puts和printf在musl中都没有问题

(由于ELF规范似​​乎将其称为动态加载程序,而gnu则将其称为动态链接器,因此它被正式称为idk)

在动态链接期间,如何获得基地址以初始化共享对象文件

已授予我无法按原样发布链接器的完整代码的信息 257,903个字符,且帖子数限制为 30,000个字符

R_x86_64_RELATIVE重定位需要此基地址,我认为其中包含DT_INIT和DT_INIT_ARRAY函数/函数数组指针

当我试图弄清楚时,我得到了……好吧,我得到了两件事

    尝试分配给计算出的基地址+偏移量时,
  1. 我的重定位段出错:

    R_X86_64_RELATIVE            calculation: B + A (base address + r_addend)
    library[library_index].base_address    =          (nil)
    reloc->r_offset =          (nil)+0x000000200e78=0x000000200e78
    reloc->r_addend =          (nil)+0x000000000590=0x000000000590
    ((char**)((char*)library[library_index].base_address + reloc->r_offset)) = 0x200e78
    Segmentation fault
    

出现问题编号2

  1. 基址不正确

    searching indexes for "./DT_INIT.so" incase it has already been loaded
    current index 0 holds "./DT_INIT.so"
    index 0 holds desired library "./DT_INIT.so"
    map succeded with address: 0x7f132f66a000
    calling round_up
    returning = 0x7f1320000000
    memmove: round_up(0x7f131dbe0010, 0x000010000000)+0x000000200000 = 0x7f1320200000
    calling round_up
    returning = 0x7f1320000000
    dest = 0x7f1320200000
    dest = 0x7f1320000000
    library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr = 0x000000200e78
    library[library_index]._elf_program_header[lowest_idx].p_paddr = 0x000000200e78
    library[library_index]._elf_program_header[lowest_idx].p_vaddr = 0x000000200e78
    calling round_down
    returning = 0x000000200000
    calling round_down
    returning = 0x000000200000
    calling round_nearest
    returning = 0x000000201000
    base address range = 0x7f1320000000 - 0x7f1320201028
    mapping = 0x7f1320000000
    base address =          (nil)
    

通过

完成
dlopen(lib) > dlopen_(lib) > init_(lib) > {
                                            > init(lib) // mmaps the lib
                                            > map()
**base address is assigned in the function map(), which aswell
as assigning the base address, also maps the PT_LOADS**
}

int init(char * lib) {
    if (library[library_index].struct_init != "initialized") init_struct();
    if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "current index %d holds \"%s\"\nsearching indexes for \"%s\" incase it has already been loaded\n", library_index, library[library_index].last_lib, lib);

    library_index = search(lib);
    library[library_index].last_lib = lib;
    library[library_index].current_lib = lib;
    if (library[library_index].array == NULL) {
        int fd = open(lib, O_RDONLY);
        if (fd < 0) {
            if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "cannot open \"%s\", returned %i\n", lib, fd);
            return -1;
        }
        library[library_index].len = 0;
        library[library_index].len = lseek(fd, 0, SEEK_END);
        lseek(fd, 0, 0);
        library[library_index].array = mmap (NULL, library[library_index].len, PROT_READ, MAP_PRIVATE, fd, 0);
        if (library[library_index].array == MAP_FAILED) {
            if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "map failed\n");
            exit;
        } else {
            if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "map succeded with address: %014p\n", library[library_index].array);
            return 0;
        }
    } else return 0;
    return -1;
}

int prot_from_phdr(const int p_flags)
{
    int prot = 0;
    if (p_flags & PF_R)
    {
        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PROT_READ|");
        prot |= PROT_READ;
    }
    if (p_flags & PF_W)
    {
        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PROT_WRITE|");
        prot |= PROT_WRITE;
    }
    if (p_flags & PF_X)
    {
        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PROT_EXEC|");
        prot |= PROT_EXEC;
    }
    return prot;
}

uintptr_t round_nearest(uintptr_t value, uintptr_t size)
{
    printf("calling %s\n", __func__);
    uintptr_t result = 0;
    uintptr_t remainder = value % size;
    if (remainder < size/2) { result = value - remainder; } else { result = value + size - remainder; }
    printf("returning = %014p\n", result);
    return result;
}


uintptr_t round_down(uintptr_t value, uintptr_t size)
{
    printf("calling %s\n", __func__);
    uintptr_t result = 0;
    result = (value/size)*size;
    printf("returning = %014p\n", result);
    return result;
}

uintptr_t round_up(uintptr_t value, uintptr_t size)
{
    printf("calling %s\n", __func__);
    uintptr_t result = 0;
    result = value ? size * ((value + (size - 1)) / size) : size;
    printf("returning = %014p\n", result);
    return result;
}

// special version specifically for PT_LOAD handling
int read_fast_verifyb(const char *src, int len_of_source, char **dest, int requested_len, Elf64_Phdr PT_LOAD_F, Elf64_Phdr PT_LOAD_L) {
    void * align = 0x10000000;
    *dest = malloc(requested_len+align+PT_LOAD_L.p_align);
    if (len_of_source < requested_len) memcpy(*dest, src, len_of_source);
    else memcpy(*dest, src, requested_len);
    if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "memmove: round_up(%014p, %014p)+%014p = %014p\n", *dest, align, PT_LOAD_L.p_align, round_up(*dest, align)+PT_LOAD_L.p_align);
    *dest = memmove(round_up(*dest, align)+PT_LOAD_L.p_align, *dest, requested_len);
    if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "dest = %014p\n", *dest);
    *dest = memmove(*dest-PT_LOAD_L.p_align, *dest, PT_LOAD_F.p_memsz);
    if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "dest = %014p\n", *dest);
    return requested_len;
}

void map() {
    if (library[library_index].is_mapped == 0) {
        library[library_index]._elf_header = (Elf64_Ehdr *) library[library_index].array;
        library[library_index]._elf_program_header = (Elf64_Phdr *)((unsigned long)library[library_index]._elf_header + library[library_index]._elf_header->e_phoff);

/*
the very first thing we do is obtain the base address

Base Address
The virtual addresses in the program headers might not represent the actual virtual addresses
of the program's memory image. Executable files typically contain absolute code. To let the
process execute correctly, the segments must reside at the virtual addresses used to build the
executable file. On the other hand, shared object segments typically contain
position-independent code. This lets a segment's virtual address change from one process to
another, without invalidating execution behavior. Though the system chooses virtual addresses
for individual processes, it maintains the segments’ relative positions. Because
position-independent code uses relative addressing between segments, the difference between
virtual addresses in memory must match the difference between virtual addresses in the file.

The difference between the virtual address of any segment in memory and the corresponding
virtual address in the file is thus a single constant value for any one executable or shared object
in a given process. This difference is the base address. One use of the base address is to relocate
the memory image of the program during dynamic linking.

An executable or shared object file's base address is calculated during execution from three
values: the virtual memory load address, the maximum page size, and the lowest virtual address
of a program's loadable segment. To compute the base address, one determines the memory
address associated with the lowest p_vaddr value for a PT_LOAD segment. This address is
truncated to the nearest multiple of the maximum page size. The corresponding p_vaddr value
itself is also truncated to the nearest multiple of the maximum page size. The base address is
the difference between the truncated memory address and the truncated p_vaddr value.
*/

        // aquire the first and last PT_LOAD'S
        int PT_LOADS=0;
        for (int i = 0; i < library[library_index]._elf_header->e_phnum; ++i) {
            switch(library[library_index]._elf_program_header[i].p_type)
            {
                case PT_LOAD:
//                         if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "i = %d\n", i);
//                         if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "PT_LOADS = %d\n", PT_LOADS);
                    if (!PT_LOADS)  {
//                             if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "saving first load\n");
                        library[library_index].First_Load_Header_index = i;
                    }
                    if (PT_LOADS) {
//                             if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "saving last load\n");
                        library[library_index].Last_Load_Header_index = i;
                    }
                    PT_LOADS=PT_LOADS+1;
                    break;
            }
        }
        size_t span = library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr + library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_memsz - library[library_index]._elf_program_header[library[library_index].First_Load_Header_index].p_vaddr;


        read_fast_verifyb(library[library_index].array, library[library_index].len, &library[library_index].mapping_start, span, library[library_index]._elf_program_header[library[library_index].First_Load_Header_index], library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index]);

        fprintf(stderr, "library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr = %014p\n", library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr);

        // aquire the lowest PT_LOAD'S
        Elf64_Addr lowest_p_vaddr = 0;
        int lowest_idx = -1;
        for (int i = 0; i < library[library_index]._elf_header->e_phnum; ++i) {
            switch(library[library_index]._elf_program_header[i].p_type)
            {
                case PT_LOAD:
                    if (!lowest_p_vaddr) {
                        lowest_p_vaddr = library[library_index]._elf_program_header[i].p_vaddr;
                        lowest_idx = i;
                    }
                    if (lowest_p_vaddr < library[library_index]._elf_program_header[i].p_memsz) {
                        lowest_p_vaddr = library[library_index]._elf_program_header[i].p_vaddr;
                        lowest_idx = i;
                    }
                    break;
            }
        }
        size_t pagesize = 0x1000;
        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "library[library_index]._elf_program_header[lowest_idx].p_paddr = %014p\nlibrary[library_index]._elf_program_header[lowest_idx].p_vaddr = %014p\n",library[library_index]._elf_program_header[lowest_idx].p_paddr, library[library_index]._elf_program_header[lowest_idx].p_vaddr);
        Elf64_Addr truncated_physical_address = round_down(library[library_index]._elf_program_header[lowest_idx].p_paddr, pagesize);
        Elf64_Addr truncated_virtual_address = round_down(library[library_index]._elf_program_header[lowest_idx].p_vaddr, pagesize);
        library[library_index].base_address = truncated_physical_address - truncated_virtual_address;
//      library[library_index].base_address = library[library_index].mapping_start;

        library[library_index].align = round_nearest(library[library_index]._elf_program_header[library[library_index].Last_Load_Header_index].p_vaddr, pagesize);
//      library[library_index].base_address = library[library_index].mapping_start - library[library_index].align;
        library[library_index].mapping_end = library[library_index].mapping_start+span;

        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "base address range = %014p - %014p\nmapping = %014p\nbase address = %014p\n", library[library_index].mapping_start, library[library_index].mapping_end, library[library_index].mapping_start, library[library_index].base_address);

//      abort_();
        // base address aquired, map all PT_LOAD segments adjusting by base address then continue with the rest
        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\n\n\nfind %014p, %014p, (int) 1239\n\n\n\n", library[library_index].mapping_start, library[library_index].mapping_end);

        if (library[library_index].mapping_start == 0x00000000) abort_();
        int PT_LOADS_CURRENT = 0;
        for (int i = 0; i < library[library_index]._elf_header->e_phnum; ++i) {
            switch(library[library_index]._elf_program_header[i].p_type)
            {
                case PT_LOAD:
                    PT_LOADS_CURRENT = PT_LOADS_CURRENT + 1;
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mapping PT_LOAD number %d\n", PT_LOADS_CURRENT);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_flags:  %014p\n", library[library_index]._elf_program_header[i].p_flags);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_offset: %014p\n", library[library_index]._elf_program_header[i].p_offset);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_vaddr:  %014p\n", library[library_index]._elf_program_header[i].p_vaddr);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_paddr:  %014p\n", library[library_index]._elf_program_header[i].p_paddr);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_filesz: %014p\n", library[library_index]._elf_program_header[i].p_filesz);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_memsz:  %014p\n", library[library_index]._elf_program_header[i].p_memsz);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\t\tp_align:  %014p\n\n", library[library_index]._elf_program_header[i].p_align);

                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "\tp_flags: %014p", library[library_index]._elf_program_header[i].p_flags);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_offset: %014p", library[library_index]._elf_program_header[i].p_offset);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_vaddr: %014p", library[library_index]._elf_program_header[i].p_vaddr);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_paddr: %014p", library[library_index]._elf_program_header[i].p_paddr);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_filesz: %014p", library[library_index]._elf_program_header[i].p_filesz);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_memsz: %014p", library[library_index]._elf_program_header[i].p_memsz);
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, " p_align: %014p\n\n\n", library[library_index]._elf_program_header[i].p_align);

                    if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mprotect(%014p+round_down(%014p, %014p), %014p, ", library[library_index].mapping_start, library[library_index]._elf_program_header[i].p_vaddr, library[library_index]._elf_program_header[i].p_align, library[library_index]._elf_program_header[i].p_memsz);
                    prot_from_phdr(library[library_index]._elf_program_header[i].p_flags);
                    if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, ");\n");
                    errno = 0;
                    int check_mprotect_success = mprotect(library[library_index].mapping_start+round_down(library[library_index]._elf_program_header[i].p_vaddr, library[library_index]._elf_program_header[i].p_align), round_up(library[library_index]._elf_program_header[i].p_memsz, library[library_index]._elf_program_header[i].p_align), library[library_index]._elf_program_header[i].p_flags);
                    if (errno == 0)
                    {
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mprotect on %014p succeded with size: %014p\n", library[library_index].mapping_start+round_down(library[library_index]._elf_program_header[i].p_vaddr, library[library_index]._elf_program_header[i].p_align), round_up(library[library_index]._elf_program_header[i].p_memsz, library[library_index]._elf_program_header[i].p_align));
                        print_maps();
                    }
                    else
                    {
                        if (bytecmpq(global_quiet, "no") == 0) fprintf(stderr, "mprotect failed with: %s (errno: %d, check_mprotect_success = %d)\n", strerror(errno), errno, check_mprotect_success);
                        print_maps();
                        abort_();
                    }
                    break;
            }
        }
        library[library_index].is_mapped = 1;
    }
}

int
init_(const char * filename) {
    init(filename);
    if (library[library_index].init__ == 1) return 0;
    library[library_index]._elf_header = (Elf64_Ehdr *) library[library_index].array;
    read_section_header_table_(library[library_index].array, library[library_index]._elf_header, &library[library_index]._elf_symbol_table);
    obtain_rela_plt_size(library[library_index].array, library[library_index]._elf_header, library[library_index]._elf_symbol_table);
    if(!strncmp((char*)library[library_index]._elf_header->e_ident, "\177ELF", 4)) {
        map();
        ...
    }
    ...
}

void *
dlopen_(const char * cc)
{
    if (library[library_index].init_lock == 1) {
        if (bytecmpq(ldd_quiet, "no") == 0) fprintf(stderr, "dlopen: LOCKED\n");
        return "-1";
    };
    if ( if_valid(cc) == -1) {
        fprintf(stderr, "\"%s\" not found\n", cc);
        errno = 0;
        return "-1";
    }
    init_(cc);
    ...
}

void *
dlopen(const char * cc) {
    get_needed(cc);
    return dlopen_(cc);
}

这是库的结构[]

struct lib
{
    char * rootlib;
    char * rootusrlib;
    int init_lock;
    char * struct_init;
    char * library_name;
    char ** NEEDED;
    int NEEDED_COUNT;
    char library_first_character;
    char * library_len;
    char * library_symbol;
    Elf64_Ehdr * _elf_header;
    Elf64_Phdr * _elf_program_header;
    Elf64_Shdr * _elf_symbol_table;
    char *strtab;
    size_t len;
    char * array;
    char * current_lib;
    char * last_lib;
    int is_mapped;
    size_t align;
    Elf64_Addr mapping_start;
    Elf64_Addr base_address;
    Elf64_Addr mapping_end;
    int init__;
    int PT_DYNAMIC_;
    char * tmp99D;
    Elf64_Dyn * dynamic;
    int First_Load_Header_index;
    int Last_Load_Header_index;
    size_t RELA_PLT_SIZE;
    int _R_X86_64_NONE;
    int _R_X86_64_64;
    int _R_X86_64_PC32;
    int _R_X86_64_GOT32;
    int _R_X86_64_PLT32;
    int _R_X86_64_COPY;
    int _R_X86_64_GLOB_DAT;
    int _R_X86_64_JUMP_SLOT;
    int _R_X86_64_RELATIVE;
    int _R_X86_64_GOTPCREL;
    int _R_X86_64_32;
    int _R_X86_64_32S;
    int _R_X86_64_16;
    int _R_X86_64_PC16;
    int _R_X86_64_8;
    int _R_X86_64_PC8;
    int _R_X86_64_DTPMOD64;
    int _R_X86_64_DTPOFF64;
    int _R_X86_64_TPOFF64;
    int _R_X86_64_TLSGD;
    int _R_X86_64_TLSLD;
    int _R_X86_64_DTPOFF32;
    int _R_X86_64_GOTTPOFF;
    int _R_X86_64_TPOFF32;
    int _R_X86_64_PC64;
    int _R_X86_64_GOTOFF64;
    int _R_X86_64_GOTPC32;
    int _R_X86_64_GOT64;
    int _R_X86_64_GOTPCREL64;
    int _R_X86_64_GOTPC64;
    int _Deprecated1;
    int _R_X86_64_PLTOFF64;
    int _R_X86_64_SIZE32;
    int _R_X86_64_SIZE64;
    int _R_X86_64_GOTPC32_TLSDESC;
    int _R_X86_64_TLSDESC_CALL;
    int _R_X86_64_TLSDESC;
    int _R_X86_64_IRELATIVE;
    int _R_X86_64_RELATIVE64;
    int _Deprecated2;
    int _Deprecated3;
    int _R_X86_64_GOTPLT64;
    int _R_X86_64_GOTPCRELX;
    int _R_X86_64_REX_GOTPCRELX;
    int _R_X86_64_NUM;
    int _R_X86_64_UNKNOWN;
    Elf64_Addr * GOT;
    Elf64_Addr * GOT2;
    Elf64_Addr * PLT;
} library[512];
extern struct lib library[512];

因为我不能完全确定“截断到最接近的倍数”是什么意思,但是因为p_paddr和p_vaddr是相同的,所以当“基地址是截断的内存地址和截断的内存地址之间的差值”时,对每个参数进行的计算都将为0。 p_vaddr值。”除非我不正确地解释了这一点:

An executable or shared object file's base address is calculated during execution from three
values: the virtual memory load address, the maximum page size, and the lowest virtual address
of a program's loadable segment. To compute the base address, one determines the memory
address associated with the lowest p_vaddr value for a PT_LOAD segment. This address is
truncated to the nearest multiple of the maximum page size. The corresponding p_vaddr value
itself is also truncated to the nearest multiple of the maximum page size. The base address is
the difference between the truncated memory address and the truncated p_vaddr value.

0 个答案:

没有答案