Question

在C：

中考虑以下代码

unsigned int func(int *ptr, unsigned int j)
{
    unsigned int res = j;
    int i = ptr[j+1]; // <== HERE

    for(; i<8; ++i)
        res >>= 1;

    return res;
}

知道j位于%ecx和ptr %eax，ptr[j+1]中要复制%ecx的汇编指令是什么？最重要的是，为什么？

谢谢。

Answer 1

使用

gcc -g bla.c -c -o bla.o
objdump -dS bla.o

我得到了

[...]
unsigned int func(int *ptr, unsigned int j)
{
0:   55                      push   %rbp
1:   48 89 e5                mov    %rsp,%rbp
4:   48 89 7d e8             mov    %rdi,-0x18(%rbp)
8:   89 75 e4                mov    %esi,-0x1c(%rbp)
 unsigned int res = j;
b:   8b 45 e4                mov    -0x1c(%rbp),%eax
e:   89 45 fc                mov    %eax,-0x4(%rbp)
 int i = ptr[j+1]; // <== HERE
11:   8b 45 e4                mov    -0x1c(%rbp),%eax
14:   83 c0 01                add    $0x1,%eax
17:   89 c0                   mov    %eax,%eax
19:   48 8d 14 85 00 00 00    lea    0x0(,%rax,4),%rdx
20:   00 
21:   48 8b 45 e8             mov    -0x18(%rbp),%rax
25:   48 01 d0                add    %rdx,%rax
28:   8b 00                   mov    (%rax),%eax
2a:   89 45 f8                mov    %eax,-0x8(%rbp)

  for(; i<8; ++i)
2d:   eb 07                   jmp    36 <func+0x36>
      res >>= 1;
2f:   d1 6d fc                shrl   -0x4(%rbp)
[...]

从这里我们可以看到堆栈框架中变量的以下位置：

res :  -0x4(%rbp)
i   :  -0x8(%rbp)
ptr : -0x18(%rbp)
j   : -0x1c(%rbp)

所以指令序列是

mov    -0x1c(%rbp),%eax       %eax = j
add    $0x1,%eax              %eax++
mov    %eax,%eax              cast to 64bits of %eax
lea    0x0(,%rax,4),%rdx      %rdx = %rax*4     (4 = sizeof(int))
mov    -0x18(%rbp),%rax       %rax = ptr
add    %rdx,%rax              %rax = %rax + %rdx     = ptr + i 
mov    (%rax),%eax            %rax = *(%rax)         = *(ptr+i)
mov    %eax,-0x8(%rbp)        i = %rax

这会回答你的问题吗？

Answer 2

由于您使用AT＆amp; T'％reg'表示法，我假设您在IA32（386 +）上使用GCC。

首先 - 我不确定为什么你会'j %ecx ptr和%eax在j。例如，ELF i386 ABI在入口处的堆栈上将8(%esp) @ ptr和4(%esp) @ -O2 -march=i386。让我们使用真实的func: movl 8(%esp), %eax movl 4(%esp), %edx movl 4(%edx,%eax,4), %edx cmpl $7, %edx jg .L2 .p2align 2,,3 .L3: shrl %eax incl %edx cmpl $8, %edx jne .L3 .L2: ret优化（对于叶子函数）进行编译，并留在相关部分：

movl offset(base, index, scale), dest

您将看到编译器已生成有效的%eax指令！从%edx，mov 4(ptr, j, 4), dest您可以验证这是：ptr + j * 4 + 4 - 或地址的内存：dest已加载到[unsigned] int。 (4)是ptr[j + 1]个字节，因此这是：%edx。在这种情况下，编译器已将dest分配为j，您可以随意使用其余的程序集。

现在 - 我们可以通过GCC和inline assembly强制解决问题。 %ecx中的ptr，%eax中的%eax，我们将重新使用ptr作为目的地 - 因为我们不关心{{1}不再......

unsigned int func(int *ptr, unsigned int j)
{
    unsigned int res = j;
    int i;

    __asm__ ("movl 4(%%eax,%%ecx,4), %%eax" /* OR: "movl 4(%0,%1,4), %0" */
             : "=a" (i) : "c" (j), "0" (ptr));

    for(; i<8; ++i)
        res >>= 1;

    return res;
}

GCC（"-O2 -march=i386"）生成：

func:
        movl    8(%esp), %ecx
        movl    4(%esp), %eax
        movl 4(%eax,%ecx,4), %eax
        cmpl    $7, %eax
        jg      .L2
        .p2align 2,,3
.L3:
        shrl    %ecx
        incl    %eax
        cmpl    $8, %eax
        jne     .L3
.L2:
        movl    %ecx, %eax
        ret

请注意，j已保留在%ecx循环中，i保留在%eax中。调用约定要求%eax保存返回值，因此movl %ecx, %eax指令。使用%ecx作为dest的效率会降低，因为我们希望为循环保留res = j。

因此，编译器生成的代码略好于使用“假定”寄存器的代码。这并不奇怪，因为这种寄存器分配和传播是现代编译器做得很好的事情。

超级简单的装配说明

2 个答案: