解释汇编函数行为的建议

时间:2017-11-15 03:20:51

标签: assembly 64-bit intel

我对装配比较陌生,遇到了让我难过的功能。在调试过程中,我发现$ rdi寄存器最初设置为0x55555575a1b8。基本上,我希望返回任何值,但负值或零。最初,似乎将$ esi设置为0x5575a1b8将返回0x1。实际上它返回-1。进一步检查表明,有两个递归函数调用,每个调用都将返回值保存到寄存器中以供以后比较:

r12 = Find(*(rdi + 0x8), rsi);
rax = Find(*(rdi + 0x10), rsi);

在此之后,我迷失在代码中......

0000000000001b62 <Find>:
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 <Find+0x41>
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 <Find+0x47>
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 <Find>
1b7d:   41 89 c4                mov    %eax,%r12d
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 <Find>
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 <Find+0x37>
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf <Find+0x4d>
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 <Find+0x32>
1ba3:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    $0x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1bb4:   eb de                   jmp    1b94 <Find+0x32>

非常感谢对该方法行为的任何了解。

1 个答案:

答案 0 :(得分:2)

0000000000001b62 <Find>:
; if (null == rdi) return -1;
1b62:   48 85 ff                test   %rdi,%rdi
1b65:   74 3c                   je     1ba3 <Find+0x41>
; if (esi == *rdi) return 1;
1b67:   39 37                   cmp    %esi,(%rdi)
1b69:   74 3e                   je     1ba9 <Find+0x47>
; preserving r12, rbp, rbx
1b6b:   41 54                   push   %r12
1b6d:   55                      push   %rbp
1b6e:   53                      push   %rbx
; r12d = Find( *(rdi+8), esi );
1b6f:   89 f5                   mov    %esi,%ebp
1b71:   48 89 fb                mov    %rdi,%rbx
1b74:   48 8b 7f 08             mov    0x8(%rdi),%rdi
1b78:   e8 e5 ff ff ff          callq  1b62 <Find>
1b7d:   41 89 c4                mov    %eax,%r12d
; eax = Find( *(rdi_orig+16), esi_orig)
1b80:   48 8b 7b 10             mov    0x10(%rbx),%rdi
1b84:   89 ee                   mov    %ebp,%esi
1b86:   e8 d7 ff ff ff          callq  1b62 <Find>
; if (r12d <= 0 && eax <= 0) return -1; (with restoring regs)
; if (r12d <= 0 && 0 < eax) return 2*eax + 1; (with restoring regs)
1b8b:   45 85 e4                test   %r12d,%r12d
1b8e:   7e 09                   jle    1b99 <Find+0x37>
; return 2*r12d; with restoring regs
1b90:   43 8d 04 24             lea    (%r12,%r12,1),%eax
1b94:   5b                      pop    %rbx
1b95:   5d                      pop    %rbp
1b96:   41 5c                   pop    %r12
1b98:   c3                      retq   
1b99:   85 c0                   test   %eax,%eax
1b9b:   7e 12                   jle    1baf <Find+0x4d>
1b9d:   8d 44 00 01             lea    0x1(%rax,%rax,1),%eax
1ba1:   eb f1                   jmp    1b94 <Find+0x32>
1ba3:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1ba8:   c3                      retq   
1ba9:   b8 01 00 00 00          mov    $0x1,%eax
1bae:   c3                      retq   
1baf:   b8 ff ff ff ff          mov    $0xffffffff,%eax
1bb4:   eb de                   jmp    1b94 <Find+0x32>

所以在类似C的代码中我认为(没有验证或调试,甚至只是编译):

struct node_s {
    int32_t value;
    int32_t _padding; // left starts at offset 8
    node_s* left;
    node_s* right;
};

int32_t Find(node_s* node, int32_t i) {
    if (nullptr == node) return -1;
    if (i == node->value) return 1;
    int32_t f1 = Find( node->left, i);
    int32_t f2 = Find( node->right, i);
    if (0 < f1) return 2 * f1;
    if (0 < f2) return 2 * f2 + 1;
    return -1;
}

在我看来,这是在二叉树中返回特定值的位置的未经优化的方式。

假设你有这样的树:

n0: {10, n1, n2}
n1: {11, n3, n4}
n2: {12, n5, n6}
n3: {13, nullptr, n7}
n4: {14, nullptr, nullptr}
n5: {15, nullptr, n8}
n6: {16, nullptr, nullptr}
n7: {17, nullptr, nullptr}
n8: {18, nullptr, nullptr}

           10
        /      \
     11            12
  /    \        /    \
13      14    15      16
  \             \
   17            18

然后我想:

Find(n0, 99) = -1
Find(n0, 10) = 1
Find(n0, 11) = 2
Find(n0, 12) = 3
Find(n1, 11) = 1
Find(n0, 17) = 12
Find(n1, 17) = 6
Find(n3, 17) = 3
Find(n7, 17) = 1
...

或者从n0节点搜索特定值时,返回值将是(我希望我没有做错):

           1
        /      \
     2             3
  /    \        /    \
 4       6     5       7
  \             \
   12             13

所以它返回整个“楼层”(链接深度),因为某些范围 i .. i + f_n-1 (f_n =楼层上最大节点的数量)在BT)中,其中前半部分是左节点,后半部分是右节点。

现在,如果您在机器中有该代码,您可以尝试重新创建这样的树形结构以供输入并让我知道,如果我确实猜错..:)

编辑:不,没有。那个底部将是12,14,而不是12,13,所以这个顺序比漂亮的左/右分裂更棘手......太累了想不出如何命名。当你有从root到value的路径时,看起来像某种词典排序(但是排序类似于反向字符串,或者是从值到顶部的路径):

1 : ""
2 : L
3 : R
4 : LL
5 : RL
6 : LR
7 : RR
8 : LLL
9 : RLL
10: LRL
11: RRL
12: LLR
13: RLR
14: LRR
15: RRR
16: LLLL
17: RLLL
18: LRLL
...