我对在Mac OS X平台上的x86-64
程序集编程很感兴趣。我遇到了this page about creating a 248B Mach-O program,这导致我Apple's own Mach-O format reference。之后我想我会在Xcode中制作同样简单的C程序并检查生成的程序集。
这是代码:
int main(int argc, const char * argv[])
{
return 42;
}
但是生成的程序集是334行,包含(基于248B模型)过量内容的批次。
首先,为什么在C可执行文件的Release版本中包含了如此多的DWARF调试信息?其次,我注意到Mach-O头数据被包含4次(在不同的DWARF相关sections
中)。为什么这有必要?最后,Xcode程序集包括:
.private_extern _main
.globl _main
_main:
.cfi_startproc
但是在248B计划中,这些都无处可见 - 程序改为从_start
开始。如果根据定义所有程序都以main
开始?
完整的Xcode程序集:
# Assembly output for main.c
# Generated at 4:04:08 PM on Sunday, January 20, 2013
# Using Release configuration, x86_64 architecture for Tiny target of Tiny project
.section __TEXT,__text,regular,pure_instructions
.file 1 "/Users/####/Desktop/Tiny/Tiny/main.c"
.section __DWARF,__debug_info,regular,debug
Lsection_info:
.section __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
.section __DWARF,__debug_aranges,regular,debug
.section __DWARF,__debug_macinfo,regular,debug
.section __DWARF,__debug_line,regular,debug
Lsection_line:
.section __DWARF,__debug_loc,regular,debug
.section __DWARF,__debug_pubtypes,regular,debug
.section __DWARF,__debug_str,regular,debug
Lsection_str:
.section __DWARF,__debug_ranges,regular,debug
Ldebug_range:
.section __DWARF,__debug_loc,regular,debug
Lsection_debug_loc:
.section __TEXT,__text,regular,pure_instructions
Ltext_begin:
.section __DATA,__data
.section __TEXT,__text,regular,pure_instructions
.private_extern _main
.globl _main
_main: ## @main
.cfi_startproc
Lfunc_begin0:
.loc 1 12 0 ## /Users/####/Desktop/Tiny/Tiny/main.c:12:0
## BB#0:
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
##DEBUG_VALUE: main:argc <- EDI+0
##DEBUG_VALUE: main:argv <- RSI+0
movl $42, %eax
.loc 1 15 5 prologue_end ## /Users/####/Desktop/Tiny/Tiny/main.c:15:5
Ltmp5:
popq %rbp
ret
Ltmp6:
Lfunc_end0:
.cfi_endproc
Ltext_end:
.section __DATA,__data
Ldata_end:
.section __TEXT,__text,regular,pure_instructions
Lsection_end1:
.section __DWARF,__debug_info,regular,debug
Linfo_begin1:
.long 127 ## Length of Compilation Unit Info
.short 2 ## DWARF version number
Lset0 = Labbrev_begin-Lsection_abbrev ## Offset Into Abbrev. Section
.long Lset0
.byte 8 ## Address Size (in bytes)
.byte 1 ## Abbrev [1] 0xb:0x78 DW_TAG_compile_unit
Lset1 = Lstring0-Lsection_str ## DW_AT_producer
.long Lset1
.short 12 ## DW_AT_language
Lset2 = Lstring1-Lsection_str ## DW_AT_name
.long Lset2
.quad 0 ## DW_AT_entry_pc
.long 0 ## DW_AT_stmt_list
Lset3 = Lstring2-Lsection_str ## DW_AT_comp_dir
.long Lset3
.byte 1 ## DW_AT_APPLE_optimized
.byte 2 ## Abbrev [2] 0x27:0x3e DW_TAG_subprogram
Lset4 = Lstring3-Lsection_str ## DW_AT_name
.long Lset4
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.byte 1 ## DW_AT_prototyped
.long 101 ## DW_AT_type
.byte 1 ## DW_AT_external
.quad Lfunc_begin0 ## DW_AT_low_pc
.quad Lfunc_end0 ## DW_AT_high_pc
.byte 1 ## DW_AT_frame_base
.byte 86
.byte 3 ## Abbrev [3] 0x46:0xf DW_TAG_formal_parameter
Lset5 = Lstring5-Lsection_str ## DW_AT_name
.long Lset5
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.long 101 ## DW_AT_type
Lset6 = Ldebug_loc0-Lsection_debug_loc ## DW_AT_location
.long Lset6
.byte 3 ## Abbrev [3] 0x55:0xf DW_TAG_formal_parameter
Lset7 = Lstring6-Lsection_str ## DW_AT_name
.long Lset7
.byte 1 ## DW_AT_decl_file
.byte 11 ## DW_AT_decl_line
.long 125 ## DW_AT_type
Lset8 = Ldebug_loc2-Lsection_debug_loc ## DW_AT_location
.long Lset8
.byte 0 ## End Of Children Mark
.byte 4 ## Abbrev [4] 0x65:0x7 DW_TAG_base_type
Lset9 = Lstring4-Lsection_str ## DW_AT_name
.long Lset9
.byte 5 ## DW_AT_encoding
.byte 4 ## DW_AT_byte_size
.byte 4 ## Abbrev [4] 0x6c:0x7 DW_TAG_base_type
Lset10 = Lstring7-Lsection_str ## DW_AT_name
.long Lset10
.byte 6 ## DW_AT_encoding
.byte 1 ## DW_AT_byte_size
.byte 5 ## Abbrev [5] 0x73:0x5 DW_TAG_const_type
.long 108 ## DW_AT_type
.byte 6 ## Abbrev [6] 0x78:0x5 DW_TAG_pointer_type
.long 115 ## DW_AT_type
.byte 6 ## Abbrev [6] 0x7d:0x5 DW_TAG_pointer_type
.long 120 ## DW_AT_type
.byte 0 ## End Of Children Mark
Linfo_end1:
.section __DWARF,__debug_abbrev,regular,debug
Labbrev_begin:
.byte 1 ## Abbreviation Code
.byte 17 ## DW_TAG_compile_unit
.byte 1 ## DW_CHILDREN_yes
.byte 37 ## DW_AT_producer
.byte 14 ## DW_FORM_strp
.byte 19 ## DW_AT_language
.byte 5 ## DW_FORM_data2
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 82 ## DW_AT_entry_pc
.byte 1 ## DW_FORM_addr
.byte 16 ## DW_AT_stmt_list
.byte 6 ## DW_FORM_data4
.byte 27 ## DW_AT_comp_dir
.byte 14 ## DW_FORM_strp
.ascii "\341\177" ## DW_AT_APPLE_optimized
.byte 12 ## DW_FORM_flag
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 2 ## Abbreviation Code
.byte 46 ## DW_TAG_subprogram
.byte 1 ## DW_CHILDREN_yes
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 58 ## DW_AT_decl_file
.byte 11 ## DW_FORM_data1
.byte 59 ## DW_AT_decl_line
.byte 11 ## DW_FORM_data1
.byte 39 ## DW_AT_prototyped
.byte 12 ## DW_FORM_flag
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 63 ## DW_AT_external
.byte 12 ## DW_FORM_flag
.byte 17 ## DW_AT_low_pc
.byte 1 ## DW_FORM_addr
.byte 18 ## DW_AT_high_pc
.byte 1 ## DW_FORM_addr
.byte 64 ## DW_AT_frame_base
.byte 10 ## DW_FORM_block1
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 3 ## Abbreviation Code
.byte 5 ## DW_TAG_formal_parameter
.byte 0 ## DW_CHILDREN_no
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 58 ## DW_AT_decl_file
.byte 11 ## DW_FORM_data1
.byte 59 ## DW_AT_decl_line
.byte 11 ## DW_FORM_data1
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 2 ## DW_AT_location
.byte 6 ## DW_FORM_data4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 4 ## Abbreviation Code
.byte 36 ## DW_TAG_base_type
.byte 0 ## DW_CHILDREN_no
.byte 3 ## DW_AT_name
.byte 14 ## DW_FORM_strp
.byte 62 ## DW_AT_encoding
.byte 11 ## DW_FORM_data1
.byte 11 ## DW_AT_byte_size
.byte 11 ## DW_FORM_data1
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 5 ## Abbreviation Code
.byte 38 ## DW_TAG_const_type
.byte 0 ## DW_CHILDREN_no
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 6 ## Abbreviation Code
.byte 15 ## DW_TAG_pointer_type
.byte 0 ## DW_CHILDREN_no
.byte 73 ## DW_AT_type
.byte 19 ## DW_FORM_ref4
.byte 0 ## EOM(1)
.byte 0 ## EOM(2)
.byte 0 ## EOM(3)
Labbrev_end:
.section __DWARF,__apple_names,regular,debug
Lnames_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 1 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.long 0 ## Bucket 0
.long 2090499946 ## Hash in Bucket 0
.long LNames0-Lnames_begin ## Offset in Bucket 0
LNames0:
Lset11 = Lstring3-Lsection_str ## main
.long Lset11
.long 1 ## Num DIEs
.long 39
.long 0
.section __DWARF,__apple_objc,regular,debug
Lobjc_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_namespac,regular,debug
Lnamespac_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 1 ## Header Bucket Count
.long 0 ## Header Hash Count
.long 12 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 1 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.long -1 ## Bucket 0
.section __DWARF,__apple_types,regular,debug
Ltypes_begin:
.long 1212240712 ## Header Magic
.short 1 ## Header Version
.short 0 ## Header Hash Function
.long 2 ## Header Bucket Count
.long 2 ## Header Hash Count
.long 20 ## Header Data Length
.long 0 ## HeaderData Die Offset Base
.long 3 ## HeaderData Atom Count
.short 1 ## eAtomTypeDIEOffset
.short 6 ## DW_FORM_data4
.short 3 ## eAtomTypeTag
.short 5 ## DW_FORM_data2
.short 5 ## eAtomTypeTypeFlags
.short 11 ## DW_FORM_data1
.long 0 ## Bucket 0
.long 1 ## Bucket 1
.long 193495088 ## Hash in Bucket 0
.long 2090147939 ## Hash in Bucket 1
.long Ltypes0-Ltypes_begin ## Offset in Bucket 0
.long Ltypes1-Ltypes_begin ## Offset in Bucket 1
Ltypes0:
Lset12 = Lstring4-Lsection_str ## int
.long Lset12
.long 1 ## Num DIEs
.long 101
.short 36
.byte 0
.long 0
Ltypes1:
Lset13 = Lstring7-Lsection_str ## char
.long Lset13
.long 1 ## Num DIEs
.long 108
.short 36
.byte 0
.long 0
.section __DWARF,__debug_pubtypes,regular,debug
Lset14 = Lpubtypes_end1-Lpubtypes_begin1 ## Length of Public Types Info
.long Lset14
Lpubtypes_begin1:
.short 2 ## DWARF Version
Lset15 = Linfo_begin1-Lsection_info ## Offset of Compilation Unit Info
.long Lset15
Lset16 = Linfo_end1-Linfo_begin1 ## Compilation Unit Length
.long Lset16
.long 0 ## End Mark
Lpubtypes_end1:
.section __DWARF,__debug_loc,regular,debug
Ldebug_loc0:
.quad Lfunc_begin0
.quad Ltmp6
Lset17 = Ltmp8-Ltmp7 ## Loc expr size
.short Lset17
Ltmp7:
.byte 85 ## DW_OP_reg5
Ltmp8:
.quad 0
.quad 0
Ldebug_loc2:
.quad Lfunc_begin0
.quad Ltmp6
Lset18 = Ltmp10-Ltmp9 ## Loc expr size
.short Lset18
Ltmp9:
.byte 84 ## DW_OP_reg4
Ltmp10:
.quad 0
.quad 0
Ldebug_loc4:
.section __DWARF,__debug_aranges,regular,debug
.section __DWARF,__debug_ranges,regular,debug
.section __DWARF,__debug_macinfo,regular,debug
.section __DWARF,__debug_inlined,regular,debug
Lset19 = Ldebug_inlined_end1-Ldebug_inlined_begin1 ## Length of Debug Inlined Information Entry
.long Lset19
Ldebug_inlined_begin1:
.short 2 ## Dwarf Version
.byte 8 ## Address Size (in bytes)
Ldebug_inlined_end1:
.section __DWARF,__debug_str,regular,debug
Lstring0:
.asciz "Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)"
Lstring1:
.asciz "/Users/####/Desktop/Tiny/Tiny/main.c"
Lstring2:
.asciz "/Users/####/Desktop/Tiny"
Lstring3:
.asciz "main"
Lstring4:
.asciz "int"
Lstring5:
.asciz "argc"
Lstring6:
.asciz "argv"
Lstring7:
.asciz "char"
.subsections_via_symbols
答案 0 :(得分:13)
首先,为什么在C可执行文件的发布版本中包含了如此多的DWARF调试信息?
能够调试优化代码非常有用。只有在优化版本中才能看到错误的情况并不罕见。如果您正在编写程序集,那么您不太可能关心DWARF信息,因此我建议您在不使用-g
参数的情况下构建比较代码。
其次,我注意到Mach-O头数据被包含4次(在不同的DWARF相关部分中)。为什么这有必要?
这些不是你所看到的Mach-O标题。它们是DWARF accelerator tables的标头,是DWARF的LLVM扩展,它优化了是否在给定的编译单元中定义符号的测试。
但在248B计划中,这些都无处可见 - 程序改为从_start开始。如果根据定义所有程序都以main开头,那怎么可能呢?
历史上,在OS X上,所有程序都从start
开始。但是,此符号通常来自系统库,而不是由程序本身定义。 start
的系统实现将执行一些初始化,然后跳转到您的程序“真实”入口点。
Mach-O二进制文件的入口点由LC_UNIXTHREAD
或LC_MAIN
加载命令定义。当LC_UNIXTHREAD
(OS X之前的10.8版本的约定)与常规C或C ++程序一起使用时,链接器使用start
作为入口点。此符号通常来自/usr/lib/crt1.o
,其地址写入LC_UNIXTHREAD
load命令的指令指针字段。您链接到的248B二进制文件包含LC_UNIXTHREAD
命令,eip
设置为0x000010e8。这是符号_start
的地址。由于这个小程序是一个静态可执行程序,并且二进制文件是直接生成的,它可以将它希望的任何地址写入加载命令的指令指针字段。
如果您正在构建针对OS X 10.8+的可执行文件,则链接器将生成LC_MAIN
加载命令而不是LC_UNIXTHREAD
。内核知道使用LC_MAIN
命令的二进制文件应该通过加载动态链接器并跳转到其入口点来执行。动态链接器dyld初始化自身,然后跳转到LC_MAIN
命令中指定的地址。在这个勇敢的新世界中,根本没有使用名为start
的符号。