我可以将“Objdump -S -d elf-file”生成的所有部分组合成一个具有重新组装功能的文件吗?

时间:2013-12-10 22:17:48

标签: assembly reverse-engineering elf disassembly objdump

elf文件是静态链接的

目前objdump的输出类似于:

Disassembly of section: .init:

xxxxxx

Disassembly of section: .plt:

xxxxxx

Disassembly of section: .text:

xxxxxx

基本上我想要实现的是

“elf-file - (通过objdump反汇编) - >汇编文件 - (重新编译) - > 相同的功能

我不需要重新编译的二进制文件具有与原始二进制文件相同的二进制内容,只有相同的功能就足够了。

经过快速搜索,答案基本上是没有,他们认为反汇编文件丢失了某些内容,如符号信息或其他内容,但我认为通过静态链接,我可以摆脱这个问题......

谢谢!

2 个答案:

答案 0 :(得分:5)

objdump -S -d elf-file通常不够,因为它缺少.data部分。

但似乎objdump -S -D elf-file就足够了。

为了尝试这个,我编写了一个使用extern printf的小型x86-64程序集文件,使用YASM将其组装而没有调试符号并与GCC链接。

[bits 64]

; yasm -f elf64 -m amd64 1st_generation.asm -o 1st_generation.o; gcc -o 1st_generation 1st_generation.o

section .text
global main
extern printf

main:
    push    rbp
    mov     rbp,rsp
    lea     rdi,[msg]
    mov     rsi,[num]
    xor     eax,eax
    call    printf
    mov     eax,60
    xor     ebx,ebx
    syscall

section .data

msg db 'abcdef = %d', 0xa, 0
num dd 1337

testmsg1:
db "test 01", 0x0a, 0

然后我用objdump -S -D -M intel elf-file >objdump_output.txt反汇编它。 -M intel以英特尔格式生成反汇编。 AT& T也可以工作,但我更喜欢英特尔格式的清晰度。

然后我编写了一个小gawk程序objdump_to_asm,将objdump -S -D -M intel elf-file >objdump_output.txt生成的反汇编转换为适合YASM的格式。假设x86-64代码和main作为入口点。可以轻松编辑到不同的环境(x86是微不足道的,其他人可能需要更多的工作)。用法./objdump_to_asm objdump_output.txt。有趣的是,第一代可执行文件的大小为6598字节,而第二代可执行文件的大小仅为6496字节。第3代汇编代码与第2代汇编代码相同。

以下是代码:

#!/usr/bin/awk -f
BEGIN{
    disassembly_of_section_string = "Disassembly of section ";

    sections_to_discard[1] = ".interp";
    sections_to_discard[2] = ".note.ABI-tag";
    sections_to_discard[3] = ".note.gnu.build-id";
    sections_to_discard[4] = ".dynsym";
    sections_to_discard[5] = ".dynstr";
    sections_to_discard[6] = ".hash";
    sections_to_discard[7] = ".gnu.hash";
    sections_to_discard[8] = ".gnu.version";
    sections_to_discard[9] = ".gnu.version_r";
    sections_to_discard[10] = ".rela.dyn";
    sections_to_discard[11] = ".rela.init";
    sections_to_discard[12] = ".eh_frame";
    sections_to_discard[13] = ".dynamic";
    sections_to_discard[14] = ".got";
    sections_to_discard[15] = ".got.plt";
    sections_to_discard[16] = ".jcr";
    sections_to_discard[17] = ".init_array";
    sections_to_discard[18] = ".comment";
    sections_to_discard[19] = ".note.gnu.gold-version";

    number_of_sections_to_discard = length(sections_to_discard);

    sections_to_handle[1] = ".plt";
    sections_to_handle[2] = ".text";
    sections_to_handle[3] = ".data";
    sections_to_handle[4] = ".bss";

    number_of_sections_to_handle = length(sections_to_handle);

    blocks_to_discard_in_text[1] = "<call_gmon_start>:";
    blocks_to_discard_in_text[2] = "<deregister_tm_clones>:";
    blocks_to_discard_in_text[3] = "<register_tm_clones>:";
    blocks_to_discard_in_text[4] = "<__do_global_dtors_aux>:";
    blocks_to_discard_in_text[5] = "<frame_dummy>:"
    blocks_to_discard_in_text[6] = "<__libc_csu_fini>:"
    blocks_to_discard_in_text[7] = "<__libc_csu_init>:"
    blocks_to_discard_in_text[8] = "<_start>:"; # !!!

    number_of_blocks_to_discard_in_text = length(blocks_to_discard_in_text);

    blocks_to_handle_in_text[1] = "main"

    number_of_blocks_to_handle_in_text = length(blocks_to_handle_in_text);

    blocks_to_handle_in_data[1] = "__dso_handle"

    number_of_blocks_to_handle_in_data = length(blocks_to_handle_in_data);

    externs_to_handle[1] = "printf";

    number_of_externs_to_handle = length(externs_to_handle);

    hexdump_start_byte = 11;
    disassembly_start_byte = 33;

    current_section = "";

    getline;
    getline;

    file_format_index = match($0, "file format elf64-x86-64")
    if (file_format_index > 0)
    {
        print "[bits 64]";
    }
}
{
    match_index = 0; # 0 : no match, > 0 : match.
    i = 1;           # index to sections_to_handle .
    while (i <= number_of_sections_to_handle)
    {
        match_index = match($0, (disassembly_of_section_string sections_to_handle[i]));
        if (match_index > 0) # we have a section to handle.
        {
            current_section = sections_to_handle[i];
            getline;
            break;
        }
        i++;
    }

    match_index = 0; # 0 : no match, > 0 : match.
    i = 1;           # index to sections_to_discard .
    while (i <= number_of_sections_to_discard)
    {
        match_index = match($0, (disassembly_of_section_string sections_to_discard[i]));
        if (match_index > 0) # we have a section to discard.
        {
            current_section = sections_to_discard[i];
            getline;
            break;
        }
        i++;
    }

    if (match (current_section, ".plt"))
    {
        match_index = 0; # 0 : no match, > 0 : match.
        i = 1;           # index to externs_to_handle.

        while (i <= number_of_externs_to_handle)
        {
            match_index = match($0, ("<" externs_to_handle[i] "@plt>:"));

            if (match_index > 0)    # we have an extern to handle.
            {
                print "extern " externs_to_handle[i];
                getline;
                break;
            }
            i++;
        }
    }
    if (match (current_section, ".text"))
    {
        match_index = 0; # 0 : no match, > 0 : match.
        i = 1;           # index to the blocks of section .text . 

        while (i <= number_of_blocks_to_handle_in_text)
        {
            match_index = match($0, ("<" blocks_to_handle_in_text[i] ">:"));

            if (match_index > 0)    # we have a block to handle.
            {
                print "section .text";
                print "global main";
                print blocks_to_handle_in_text[i] ":";
                getline;

                while ((length ($0)) > 0)
                {
                    disassembly_without_hex_bytes = substr($0, disassembly_start_byte);
                    disassembly_without_hex_bytes = gensub(/PTR /, "", "g", disassembly_without_hex_bytes);
                    disassembly_without_hex_bytes = gensub(/(ds:)([a-z0-9]*)/, "[\\2]", "g", disassembly_without_hex_bytes);

                    match_index = 0; # 0 : no match, > 0 : match.
                    j = 1;           # index to externs to handle.

                    while (j <= number_of_externs_to_handle)
                    {
                        match_index = match(disassembly_without_hex_bytes, ("<" externs_to_handle[i] "@plt>"));

                        if (match_index > 0)    # we have an extern to handle.
                        {
                            current_extern_to_handle = externs_to_handle[j];
                            "echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\( <\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'" |& getline disassembly_without_hex_bytes;
                            close("echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\( <\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'");
                            break;
                        }
                        j++;
                    }

                    if (match(disassembly_without_hex_bytes, "data32") != 1)
                    {
                        print disassembly_without_hex_bytes;
                    }
                    getline;
                }
                break;
            }
            i++;
        }
    }

    if (match (current_section, ".data"))
    {
        match_index = 0; # 0 : no match, > 0 : match.
        i = 1;           # index to the blocks of section .data .

        while (i <= number_of_blocks_to_handle_in_data)
        {
            match_index = match($0, ("<" blocks_to_handle_in_data[i] ">:"));

            if (match_index > 0)    # we have a block to handle.
            {
                print "section .data";
                getline;

                while ((length ($0)) > 0)
                {
                    getline;
                    hexdump_only = substr($0, hexdump_start_byte, (disassembly_start_byte - hexdump_start_byte));
                    hexdump_only = gensub(/([[:alnum:]]+)/, "0x\\1", "g", hexdump_only);
                    hexdump_only = gensub(/(0x[[:alnum:]]+)( )(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
                    hexdump_only = gensub(/(0x[[:alnum:]]+)( )(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
                    if (match (hexdump_only, "0x") > 0)
                    {
                        print "db " hexdump_only;
                    }
                }
                break;
            }
            i++;
        }
    }
}

执行./objdump_to_asm objdump_output.txt >2nd_generation.asm会生成以下程序集文件。与YASM组装,与GCC链接。组装和链接的可执行文件与原始文件不同,实际上是6496字节,而原始可执行文件的大小为6568字节。

[bits 64]
extern printf
section .text
global main
main:
push   rbp
mov    rbp,rsp
lea    rdi,[0x401958]

mov    rsi,QWORD [0x401965]

xor    eax,eax
call   printf
mov    eax,0x3c
xor    ebx,ebx
syscall 

section .data
db 0x61                     
db 0x62                     
db 0x63, 0x64, 0x65, 0x66           
db 0x20, 0x3d, 0x20, 0x25, 0x64, 0x0a       
db 0x00, 0x39                   
db 0x05, 0x00, 0x00, 0x74, 0x65         
db 0x73, 0x74                   
db 0x20, 0x30                   
db 0x31, 0x0a                   
db 0x00, 0x00                   

答案 1 :(得分:1)

至少需要一些“中间阶段”才能使reloc-info成为汇编程序接受的形式。我所看到的(Linux),代码部分(内部)链接,然后标签信息被reloc信息替换。

您可以看到它们跳转到跳转指令本身并重新定位有关在加载时如何对跳转目标地址执行操作的信息。