elf文件是静态链接的
目前objdump的输出类似于:
Disassembly of section: .init:
xxxxxx
Disassembly of section: .plt:
xxxxxx
Disassembly of section: .text:
xxxxxx
基本上我想要实现的是
“elf-file - (通过objdump反汇编) - >汇编文件 - (重新编译) - > 相同的功能”
我不需要重新编译的二进制文件具有与原始二进制文件相同的二进制内容,只有相同的功能就足够了。
经过快速搜索,答案基本上是没有,他们认为反汇编文件丢失了某些内容,如符号信息或其他内容,但我认为通过静态链接,我可以摆脱这个问题......
谢谢!
答案 0 :(得分:5)
objdump -S -d elf-file
通常不够,因为它缺少.data
部分。
但似乎objdump -S -D elf-file
就足够了。
为了尝试这个,我编写了一个使用extern printf
的小型x86-64程序集文件,使用YASM将其组装而没有调试符号并与GCC链接。
[bits 64] ; yasm -f elf64 -m amd64 1st_generation.asm -o 1st_generation.o; gcc -o 1st_generation 1st_generation.o section .text global main extern printf main: push rbp mov rbp,rsp lea rdi,[msg] mov rsi,[num] xor eax,eax call printf mov eax,60 xor ebx,ebx syscall section .data msg db 'abcdef = %d', 0xa, 0 num dd 1337 testmsg1: db "test 01", 0x0a, 0
然后我用objdump -S -D -M intel elf-file >objdump_output.txt
反汇编它。 -M intel
以英特尔格式生成反汇编。 AT& T也可以工作,但我更喜欢英特尔格式的清晰度。
然后我编写了一个小gawk
程序objdump_to_asm
,将objdump -S -D -M intel elf-file >objdump_output.txt
生成的反汇编转换为适合YASM的格式。假设x86-64代码和main
作为入口点。可以轻松编辑到不同的环境(x86是微不足道的,其他人可能需要更多的工作)。用法./objdump_to_asm objdump_output.txt
。有趣的是,第一代可执行文件的大小为6598字节,而第二代可执行文件的大小仅为6496字节。第3代汇编代码与第2代汇编代码相同。
以下是代码:
#!/usr/bin/awk -f
BEGIN{
disassembly_of_section_string = "Disassembly of section ";
sections_to_discard[1] = ".interp";
sections_to_discard[2] = ".note.ABI-tag";
sections_to_discard[3] = ".note.gnu.build-id";
sections_to_discard[4] = ".dynsym";
sections_to_discard[5] = ".dynstr";
sections_to_discard[6] = ".hash";
sections_to_discard[7] = ".gnu.hash";
sections_to_discard[8] = ".gnu.version";
sections_to_discard[9] = ".gnu.version_r";
sections_to_discard[10] = ".rela.dyn";
sections_to_discard[11] = ".rela.init";
sections_to_discard[12] = ".eh_frame";
sections_to_discard[13] = ".dynamic";
sections_to_discard[14] = ".got";
sections_to_discard[15] = ".got.plt";
sections_to_discard[16] = ".jcr";
sections_to_discard[17] = ".init_array";
sections_to_discard[18] = ".comment";
sections_to_discard[19] = ".note.gnu.gold-version";
number_of_sections_to_discard = length(sections_to_discard);
sections_to_handle[1] = ".plt";
sections_to_handle[2] = ".text";
sections_to_handle[3] = ".data";
sections_to_handle[4] = ".bss";
number_of_sections_to_handle = length(sections_to_handle);
blocks_to_discard_in_text[1] = "<call_gmon_start>:";
blocks_to_discard_in_text[2] = "<deregister_tm_clones>:";
blocks_to_discard_in_text[3] = "<register_tm_clones>:";
blocks_to_discard_in_text[4] = "<__do_global_dtors_aux>:";
blocks_to_discard_in_text[5] = "<frame_dummy>:"
blocks_to_discard_in_text[6] = "<__libc_csu_fini>:"
blocks_to_discard_in_text[7] = "<__libc_csu_init>:"
blocks_to_discard_in_text[8] = "<_start>:"; # !!!
number_of_blocks_to_discard_in_text = length(blocks_to_discard_in_text);
blocks_to_handle_in_text[1] = "main"
number_of_blocks_to_handle_in_text = length(blocks_to_handle_in_text);
blocks_to_handle_in_data[1] = "__dso_handle"
number_of_blocks_to_handle_in_data = length(blocks_to_handle_in_data);
externs_to_handle[1] = "printf";
number_of_externs_to_handle = length(externs_to_handle);
hexdump_start_byte = 11;
disassembly_start_byte = 33;
current_section = "";
getline;
getline;
file_format_index = match($0, "file format elf64-x86-64")
if (file_format_index > 0)
{
print "[bits 64]";
}
}
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to sections_to_handle .
while (i <= number_of_sections_to_handle)
{
match_index = match($0, (disassembly_of_section_string sections_to_handle[i]));
if (match_index > 0) # we have a section to handle.
{
current_section = sections_to_handle[i];
getline;
break;
}
i++;
}
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to sections_to_discard .
while (i <= number_of_sections_to_discard)
{
match_index = match($0, (disassembly_of_section_string sections_to_discard[i]));
if (match_index > 0) # we have a section to discard.
{
current_section = sections_to_discard[i];
getline;
break;
}
i++;
}
if (match (current_section, ".plt"))
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to externs_to_handle.
while (i <= number_of_externs_to_handle)
{
match_index = match($0, ("<" externs_to_handle[i] "@plt>:"));
if (match_index > 0) # we have an extern to handle.
{
print "extern " externs_to_handle[i];
getline;
break;
}
i++;
}
}
if (match (current_section, ".text"))
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to the blocks of section .text .
while (i <= number_of_blocks_to_handle_in_text)
{
match_index = match($0, ("<" blocks_to_handle_in_text[i] ">:"));
if (match_index > 0) # we have a block to handle.
{
print "section .text";
print "global main";
print blocks_to_handle_in_text[i] ":";
getline;
while ((length ($0)) > 0)
{
disassembly_without_hex_bytes = substr($0, disassembly_start_byte);
disassembly_without_hex_bytes = gensub(/PTR /, "", "g", disassembly_without_hex_bytes);
disassembly_without_hex_bytes = gensub(/(ds:)([a-z0-9]*)/, "[\\2]", "g", disassembly_without_hex_bytes);
match_index = 0; # 0 : no match, > 0 : match.
j = 1; # index to externs to handle.
while (j <= number_of_externs_to_handle)
{
match_index = match(disassembly_without_hex_bytes, ("<" externs_to_handle[i] "@plt>"));
if (match_index > 0) # we have an extern to handle.
{
current_extern_to_handle = externs_to_handle[j];
"echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\( <\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'" |& getline disassembly_without_hex_bytes;
close("echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\( <\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'");
break;
}
j++;
}
if (match(disassembly_without_hex_bytes, "data32") != 1)
{
print disassembly_without_hex_bytes;
}
getline;
}
break;
}
i++;
}
}
if (match (current_section, ".data"))
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to the blocks of section .data .
while (i <= number_of_blocks_to_handle_in_data)
{
match_index = match($0, ("<" blocks_to_handle_in_data[i] ">:"));
if (match_index > 0) # we have a block to handle.
{
print "section .data";
getline;
while ((length ($0)) > 0)
{
getline;
hexdump_only = substr($0, hexdump_start_byte, (disassembly_start_byte - hexdump_start_byte));
hexdump_only = gensub(/([[:alnum:]]+)/, "0x\\1", "g", hexdump_only);
hexdump_only = gensub(/(0x[[:alnum:]]+)( )(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
hexdump_only = gensub(/(0x[[:alnum:]]+)( )(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
if (match (hexdump_only, "0x") > 0)
{
print "db " hexdump_only;
}
}
break;
}
i++;
}
}
}
执行./objdump_to_asm objdump_output.txt >2nd_generation.asm
会生成以下程序集文件。与YASM组装,与GCC链接。组装和链接的可执行文件与原始文件不同,实际上是6496字节,而原始可执行文件的大小为6568字节。
[bits 64] extern printf section .text global main main: push rbp mov rbp,rsp lea rdi,[0x401958] mov rsi,QWORD [0x401965] xor eax,eax call printf mov eax,0x3c xor ebx,ebx syscall section .data db 0x61 db 0x62 db 0x63, 0x64, 0x65, 0x66 db 0x20, 0x3d, 0x20, 0x25, 0x64, 0x0a db 0x00, 0x39 db 0x05, 0x00, 0x00, 0x74, 0x65 db 0x73, 0x74 db 0x20, 0x30 db 0x31, 0x0a db 0x00, 0x00
答案 1 :(得分:1)
至少需要一些“中间阶段”才能使reloc-info成为汇编程序接受的形式。我所看到的(Linux),代码部分(内部)链接,然后标签信息被reloc信息替换。
您可以看到它们跳转到跳转指令本身并重新定位有关在加载时如何对跳转目标地址执行操作的信息。