基本OS X程序集和Mach-O格式

时间:2013-01-20 06:20:03

标签: macos assembly x86-64 mach-o dwarf

我对在Mac OS X平台上的x86-64程序集编程很感兴趣。我遇到了this page about creating a 248B Mach-O program,这导致我Apple's own Mach-O format reference。之后我想我会在Xcode中制作同样简单的C程序并检查生成的程序集。

这是代码:

int main(int argc, const char * argv[])
{
    return 42;
}

但是生成的程序集是334行,包含(基于248B模型)过量内容的批次

首先,为什么在C可执行文件的Release版本中包含了如此多的DWARF调试信息?其次,我注意到Mach-O头数据被包含4次(在不同的DWARF相关sections中)。为什么这有必要?最后,Xcode程序集包括:

.private_extern _main
.globl  _main
_main:
    .cfi_startproc

但是在248B计划中,这些都无处可见 - 程序改为从_start开始。如果根据定义所有程序都以main开始?

,那怎么可能呢?

完整的Xcode程序集:

# Assembly output for main.c
# Generated at 4:04:08 PM on Sunday, January 20, 2013
# Using Release configuration, x86_64 architecture for Tiny target of Tiny project

    .section    __TEXT,__text,regular,pure_instructions
    .file   1 "/Users/####/Desktop/Tiny/Tiny/main.c"
    .section    __DWARF,__debug_info,regular,debug
Lsection_info:
    .section    __DWARF,__debug_abbrev,regular,debug
Lsection_abbrev:
    .section    __DWARF,__debug_aranges,regular,debug
    .section    __DWARF,__debug_macinfo,regular,debug
    .section    __DWARF,__debug_line,regular,debug
Lsection_line:
    .section    __DWARF,__debug_loc,regular,debug
    .section    __DWARF,__debug_pubtypes,regular,debug
    .section    __DWARF,__debug_str,regular,debug
Lsection_str:
    .section    __DWARF,__debug_ranges,regular,debug
Ldebug_range:
    .section    __DWARF,__debug_loc,regular,debug
Lsection_debug_loc:
    .section    __TEXT,__text,regular,pure_instructions
Ltext_begin:
    .section    __DATA,__data
    .section    __TEXT,__text,regular,pure_instructions
    .private_extern _main
    .globl  _main
_main:                                  ## @main
    .cfi_startproc
Lfunc_begin0:
    .loc    1 12 0                  ## /Users/####/Desktop/Tiny/Tiny/main.c:12:0
## BB#0:
    pushq   %rbp
Ltmp2:
    .cfi_def_cfa_offset 16
Ltmp3:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp4:
    .cfi_def_cfa_register %rbp
    ##DEBUG_VALUE: main:argc <- EDI+0
    ##DEBUG_VALUE: main:argv <- RSI+0
    movl    $42, %eax
    .loc    1 15 5 prologue_end     ## /Users/####/Desktop/Tiny/Tiny/main.c:15:5
Ltmp5:
    popq    %rbp
    ret
Ltmp6:
Lfunc_end0:
    .cfi_endproc

Ltext_end:
    .section    __DATA,__data
Ldata_end:
    .section    __TEXT,__text,regular,pure_instructions
Lsection_end1:
    .section    __DWARF,__debug_info,regular,debug
Linfo_begin1:
    .long   127                     ## Length of Compilation Unit Info
    .short  2                       ## DWARF version number
Lset0 = Labbrev_begin-Lsection_abbrev   ## Offset Into Abbrev. Section
    .long   Lset0
    .byte   8                       ## Address Size (in bytes)
    .byte   1                       ## Abbrev [1] 0xb:0x78 DW_TAG_compile_unit
Lset1 = Lstring0-Lsection_str           ## DW_AT_producer
    .long   Lset1
    .short  12                      ## DW_AT_language
Lset2 = Lstring1-Lsection_str           ## DW_AT_name
    .long   Lset2
    .quad   0                       ## DW_AT_entry_pc
    .long   0                       ## DW_AT_stmt_list
Lset3 = Lstring2-Lsection_str           ## DW_AT_comp_dir
    .long   Lset3
    .byte   1                       ## DW_AT_APPLE_optimized
    .byte   2                       ## Abbrev [2] 0x27:0x3e DW_TAG_subprogram
Lset4 = Lstring3-Lsection_str           ## DW_AT_name
    .long   Lset4
    .byte   1                       ## DW_AT_decl_file
    .byte   11                      ## DW_AT_decl_line
    .byte   1                       ## DW_AT_prototyped
    .long   101                     ## DW_AT_type
    .byte   1                       ## DW_AT_external
    .quad   Lfunc_begin0            ## DW_AT_low_pc
    .quad   Lfunc_end0              ## DW_AT_high_pc
    .byte   1                       ## DW_AT_frame_base
    .byte   86
    .byte   3                       ## Abbrev [3] 0x46:0xf DW_TAG_formal_parameter
Lset5 = Lstring5-Lsection_str           ## DW_AT_name
    .long   Lset5
    .byte   1                       ## DW_AT_decl_file
    .byte   11                      ## DW_AT_decl_line
    .long   101                     ## DW_AT_type
Lset6 = Ldebug_loc0-Lsection_debug_loc  ## DW_AT_location
    .long   Lset6
    .byte   3                       ## Abbrev [3] 0x55:0xf DW_TAG_formal_parameter
Lset7 = Lstring6-Lsection_str           ## DW_AT_name
    .long   Lset7
    .byte   1                       ## DW_AT_decl_file
    .byte   11                      ## DW_AT_decl_line
    .long   125                     ## DW_AT_type
Lset8 = Ldebug_loc2-Lsection_debug_loc  ## DW_AT_location
    .long   Lset8
    .byte   0                       ## End Of Children Mark
    .byte   4                       ## Abbrev [4] 0x65:0x7 DW_TAG_base_type
Lset9 = Lstring4-Lsection_str           ## DW_AT_name
    .long   Lset9
    .byte   5                       ## DW_AT_encoding
    .byte   4                       ## DW_AT_byte_size
    .byte   4                       ## Abbrev [4] 0x6c:0x7 DW_TAG_base_type
Lset10 = Lstring7-Lsection_str          ## DW_AT_name
    .long   Lset10
    .byte   6                       ## DW_AT_encoding
    .byte   1                       ## DW_AT_byte_size
    .byte   5                       ## Abbrev [5] 0x73:0x5 DW_TAG_const_type
    .long   108                     ## DW_AT_type
    .byte   6                       ## Abbrev [6] 0x78:0x5 DW_TAG_pointer_type
    .long   115                     ## DW_AT_type
    .byte   6                       ## Abbrev [6] 0x7d:0x5 DW_TAG_pointer_type
    .long   120                     ## DW_AT_type
    .byte   0                       ## End Of Children Mark
Linfo_end1:
    .section    __DWARF,__debug_abbrev,regular,debug
Labbrev_begin:
    .byte   1                       ## Abbreviation Code
    .byte   17                      ## DW_TAG_compile_unit
    .byte   1                       ## DW_CHILDREN_yes
    .byte   37                      ## DW_AT_producer
    .byte   14                      ## DW_FORM_strp
    .byte   19                      ## DW_AT_language
    .byte   5                       ## DW_FORM_data2
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   82                      ## DW_AT_entry_pc
    .byte   1                       ## DW_FORM_addr
    .byte   16                      ## DW_AT_stmt_list
    .byte   6                       ## DW_FORM_data4
    .byte   27                      ## DW_AT_comp_dir
    .byte   14                      ## DW_FORM_strp
    .ascii   "\341\177"             ## DW_AT_APPLE_optimized
    .byte   12                      ## DW_FORM_flag
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   2                       ## Abbreviation Code
    .byte   46                      ## DW_TAG_subprogram
    .byte   1                       ## DW_CHILDREN_yes
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   58                      ## DW_AT_decl_file
    .byte   11                      ## DW_FORM_data1
    .byte   59                      ## DW_AT_decl_line
    .byte   11                      ## DW_FORM_data1
    .byte   39                      ## DW_AT_prototyped
    .byte   12                      ## DW_FORM_flag
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   63                      ## DW_AT_external
    .byte   12                      ## DW_FORM_flag
    .byte   17                      ## DW_AT_low_pc
    .byte   1                       ## DW_FORM_addr
    .byte   18                      ## DW_AT_high_pc
    .byte   1                       ## DW_FORM_addr
    .byte   64                      ## DW_AT_frame_base
    .byte   10                      ## DW_FORM_block1
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   3                       ## Abbreviation Code
    .byte   5                       ## DW_TAG_formal_parameter
    .byte   0                       ## DW_CHILDREN_no
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   58                      ## DW_AT_decl_file
    .byte   11                      ## DW_FORM_data1
    .byte   59                      ## DW_AT_decl_line
    .byte   11                      ## DW_FORM_data1
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   2                       ## DW_AT_location
    .byte   6                       ## DW_FORM_data4
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   4                       ## Abbreviation Code
    .byte   36                      ## DW_TAG_base_type
    .byte   0                       ## DW_CHILDREN_no
    .byte   3                       ## DW_AT_name
    .byte   14                      ## DW_FORM_strp
    .byte   62                      ## DW_AT_encoding
    .byte   11                      ## DW_FORM_data1
    .byte   11                      ## DW_AT_byte_size
    .byte   11                      ## DW_FORM_data1
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   5                       ## Abbreviation Code
    .byte   38                      ## DW_TAG_const_type
    .byte   0                       ## DW_CHILDREN_no
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   6                       ## Abbreviation Code
    .byte   15                      ## DW_TAG_pointer_type
    .byte   0                       ## DW_CHILDREN_no
    .byte   73                      ## DW_AT_type
    .byte   19                      ## DW_FORM_ref4
    .byte   0                       ## EOM(1)
    .byte   0                       ## EOM(2)
    .byte   0                       ## EOM(3)
Labbrev_end:
    .section    __DWARF,__apple_names,regular,debug
Lnames_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   1                       ## Header Bucket Count
    .long   1                       ## Header Hash Count
    .long   12                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   1                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .long   0                       ## Bucket 0
    .long   2090499946              ## Hash in Bucket 0
    .long   LNames0-Lnames_begin    ## Offset in Bucket 0
LNames0:
Lset11 = Lstring3-Lsection_str          ## main
    .long   Lset11
    .long   1                       ## Num DIEs
    .long   39
    .long   0
    .section    __DWARF,__apple_objc,regular,debug
Lobjc_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   1                       ## Header Bucket Count
    .long   0                       ## Header Hash Count
    .long   12                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   1                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .long   -1                      ## Bucket 0
    .section    __DWARF,__apple_namespac,regular,debug
Lnamespac_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   1                       ## Header Bucket Count
    .long   0                       ## Header Hash Count
    .long   12                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   1                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .long   -1                      ## Bucket 0
    .section    __DWARF,__apple_types,regular,debug
Ltypes_begin:
    .long   1212240712              ## Header Magic
    .short  1                       ## Header Version
    .short  0                       ## Header Hash Function
    .long   2                       ## Header Bucket Count
    .long   2                       ## Header Hash Count
    .long   20                      ## Header Data Length
    .long   0                       ## HeaderData Die Offset Base
    .long   3                       ## HeaderData Atom Count
    .short  1                       ## eAtomTypeDIEOffset
    .short  6                       ## DW_FORM_data4
    .short  3                       ## eAtomTypeTag
    .short  5                       ## DW_FORM_data2
    .short  5                       ## eAtomTypeTypeFlags
    .short  11                      ## DW_FORM_data1
    .long   0                       ## Bucket 0
    .long   1                       ## Bucket 1
    .long   193495088               ## Hash in Bucket 0
    .long   2090147939              ## Hash in Bucket 1
    .long   Ltypes0-Ltypes_begin    ## Offset in Bucket 0
    .long   Ltypes1-Ltypes_begin    ## Offset in Bucket 1
Ltypes0:
Lset12 = Lstring4-Lsection_str          ## int
    .long   Lset12
    .long   1                       ## Num DIEs
    .long   101
    .short  36
    .byte   0
    .long   0
Ltypes1:
Lset13 = Lstring7-Lsection_str          ## char
    .long   Lset13
    .long   1                       ## Num DIEs
    .long   108
    .short  36
    .byte   0
    .long   0
    .section    __DWARF,__debug_pubtypes,regular,debug
Lset14 = Lpubtypes_end1-Lpubtypes_begin1 ## Length of Public Types Info
    .long   Lset14
Lpubtypes_begin1:
    .short  2                       ## DWARF Version
Lset15 = Linfo_begin1-Lsection_info     ## Offset of Compilation Unit Info
    .long   Lset15
Lset16 = Linfo_end1-Linfo_begin1        ## Compilation Unit Length
    .long   Lset16
    .long   0                       ## End Mark
Lpubtypes_end1:
    .section    __DWARF,__debug_loc,regular,debug
Ldebug_loc0:
    .quad   Lfunc_begin0
    .quad   Ltmp6
Lset17 = Ltmp8-Ltmp7                    ## Loc expr size
    .short  Lset17
Ltmp7:
    .byte   85                      ## DW_OP_reg5
Ltmp8:
    .quad   0
    .quad   0
Ldebug_loc2:
    .quad   Lfunc_begin0
    .quad   Ltmp6
Lset18 = Ltmp10-Ltmp9                   ## Loc expr size
    .short  Lset18
Ltmp9:
    .byte   84                      ## DW_OP_reg4
Ltmp10:
    .quad   0
    .quad   0
Ldebug_loc4:
    .section    __DWARF,__debug_aranges,regular,debug
    .section    __DWARF,__debug_ranges,regular,debug
    .section    __DWARF,__debug_macinfo,regular,debug
    .section    __DWARF,__debug_inlined,regular,debug
Lset19 = Ldebug_inlined_end1-Ldebug_inlined_begin1 ## Length of Debug Inlined Information Entry
    .long   Lset19
Ldebug_inlined_begin1:
    .short  2                       ## Dwarf Version
    .byte   8                       ## Address Size (in bytes)
Ldebug_inlined_end1:
    .section    __DWARF,__debug_str,regular,debug
Lstring0:
    .asciz   "Apple clang version 4.1 (tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)"
Lstring1:
    .asciz   "/Users/####/Desktop/Tiny/Tiny/main.c"
Lstring2:
    .asciz   "/Users/####/Desktop/Tiny"
Lstring3:
    .asciz   "main"
Lstring4:
    .asciz   "int"
Lstring5:
    .asciz   "argc"
Lstring6:
    .asciz   "argv"
Lstring7:
    .asciz   "char"

.subsections_via_symbols

1 个答案:

答案 0 :(得分:13)

  

首先,为什么在C可执行文件的发布版本中包含了如此多的DWARF调试信息?

能够调试优化代码非常有用。只有在优化版本中才能看到错误的情况并不罕见。如果您正在编写程序集,那么您不太可能关心DWARF信息,因此我建议您在不使用-g参数的情况下构建比较代码。


  

其次,我注意到Mach-O头数据被包含4次(在不同的DWARF相关部分中)。为什么这有必要?

这些不是你所看到的Mach-O标题。它们是DWARF accelerator tables的标头,是DWARF的LLVM扩展,它优化了是否在给定的编译单元中定义符号的测试。


  

但在248B计划中,这些都无处可见 - 程序改为从_start开始。如果根据定义所有程序都以main开头,那怎么可能呢?

历史上,在OS X上,所有程序都从start开始。但是,此符号通常来自系统库,而不是由程序本身定义。 start的系统实现将执行一些初始化,然后跳转到您的程序“真实”入口点。

Mach-O二进制文件的入口点由LC_UNIXTHREADLC_MAIN加载命令定义。当LC_UNIXTHREAD(OS X之前的10.8版本的约定)与常规C或C ++程序一起使用时,链接器使用start作为入口点。此符号通常来自/usr/lib/crt1.o,其地址写入LC_UNIXTHREAD load命令的指令指针字段。您链接到的248B二进制文件包含LC_UNIXTHREAD命令,eip设置为0x000010e8。这是符号_start的地址。由于这个小程序是一个静态可执行程序,并且二进制文件是直接生成的,它可以将它希望的任何地址写入加载命令的指令指针字段。

如果您正在构建针对OS X 10.8+的可执行文件,则链接器将生成LC_MAIN加载命令而不是LC_UNIXTHREAD。内核知道使用LC_MAIN命令的二进制文件应该通过加载动态链接器并跳转到其入口点来执行。动态链接器dyld初始化自身,然后跳转到LC_MAIN命令中指定的地址。在这个勇敢的新世界中,根本没有使用名为start的符号。