将程序集转换为伪代码

时间:2012-10-27 05:41:43

标签: c assembly reverse-engineering pseudocode disassembly

我正在做一个家庭作业项目,其中包括用编译的c编写的“炸弹”,我必须进行逆向工程以提出5个字符串,这些字符串将解除炸弹的五个“阶段”。我现在停留在第3阶段,尝试翻译gdb为该函数生成的程序集(x86,AT& T语法,我相信)。到目前为止我能够弄清楚的是,它试图将一串六个数字作为用户输入并根据某些标准判断它们,但这就是我失去它的地方。该函数如下(我在其旁边尝试了伪代码转换)。

0x08048816 <phase_3+0>: push   %ebp
0x08048817 <phase_3+1>: mov    %esp,%ebp
0x08048819 <phase_3+3>: push   %edi
0x0804881a <phase_3+4>: push   %ebx
0x0804881b <phase_3+5>: sub    $0x30,%esp
0x0804881e <phase_3+8>: lea    -0x24(%ebp),%eax                             
0x08048821 <phase_3+11>:    mov    %eax,0x4(%esp)                           
0x08048825 <phase_3+15>:    mov    0x8(%ebp),%eax                           
0x08048828 <phase_3+18>:    mov    %eax,(%esp)                              
0x0804882b <phase_3+21>:    call   0x8048d2c <read_six_numbers>
0x08048830 <phase_3+26>:    mov    -0x24(%ebp),%eax                     eax = p1
0x08048833 <phase_3+29>:    cmp    $0x1,%eax                             if eax != 1
0x08048836 <phase_3+32>:    je     0x804883d <phase_3+39>                   explode bomb
0x08048838 <phase_3+34>:    call   0x8048fec <explode_bomb>             else
0x0804883d <phase_3+39>:    movl   $0x1,-0xc(%ebp)                       ebp[-12] = 1
0x08048844 <phase_3+46>:    jmp    0x804888a <phase_3+116>              while ebp[-12] < 5 {
0x08048846 <phase_3+48>:    mov    -0xc(%ebp),%eax                          eax = ebp[-12]
0x08048849 <phase_3+51>:    mov    -0x24(%ebp,%eax,4),%eax                  {magic}
0x0804884d <phase_3+55>:    mov    %eax,%ebx                                ebx = eax
0x0804884f <phase_3+57>:    mov    -0xc(%ebp),%eax                          eax = ebp[-12]
0x08048852 <phase_3+60>:    sub    $0x1,%eax                                 eax -= 1
0x08048855 <phase_3+63>:    mov    -0x24(%ebp,%eax,4),%eax                  {magic}
0x08048859 <phase_3+67>:    mov    %eax,%edx                                edx = eax
0x0804885b <phase_3+69>:    mov    0x804a6d8,%eax                           eax = 0x804a6d8
0x08048860 <phase_3+74>:    mov    $0xffffffff,%ecx                      ecx = 255
0x08048865 <phase_3+79>:    mov    %eax,-0x2c(%ebp)                         ebp[-12] = eax
0x08048868 <phase_3+82>:    mov    $0x0,%eax                                 eax = 0
0x0804886d <phase_3+87>:    cld                         
0x0804886e <phase_3+88>:    mov    -0x2c(%ebp),%edi                         edi = ebp[-12]
0x08048871 <phase_3+91>:    repnz scas %es:(%edi),%al                       {deep magic}
0x08048873 <phase_3+93>:    mov    %ecx,%eax                                eax = ecx
0x08048875 <phase_3+95>:    not    %eax                                     eax = -eax
0x08048877 <phase_3+97>:    sub    $0x1,%eax                                 eax -= 1
0x0804887a <phase_3+100>:   imul   %edx,%eax                                eax *= edx
0x0804887d <phase_3+103>:   cmp    %eax,%ebx                                if (eax != ebx)
0x0804887f <phase_3+105>:   je     0x8048886 <phase_3+112>                      explode_bomb
0x08048881 <phase_3+107>:   call   0x8048fec <explode_bomb>                 else
0x08048886 <phase_3+112>:   addl   $0x1,-0xc(%ebp)                           ebp[-12] += 1
0x0804888a <phase_3+116>:   cmpl   $0x5,-0xc(%ebp)
0x0804888e <phase_3+120>:   jle    0x8048846 <phase_3+48>               }
0x08048890 <phase_3+122>:   add    $0x30,%esp
0x08048893 <phase_3+125>:   pop    %ebx
0x08048894 <phase_3+126>:   pop    %edi
0x08048895 <phase_3+127>:   pop    %ebp
0x08048896 <phase_3+128>:   ret

我对这大部分内容至少有点自信(尽管不是很多);我绝对肯定错误的是当前标记为“魔术”的三条线 - phase_3 + 51,phase_3 + 63和phase_3 + 91(具有奇怪语法和repnz的两条mov线)。我没有看到太多的语法,我无法弄清楚用什么搜索术语来查找它们。

对此我的尝试的任何一般(和/或严厉)批评?我要离开的明显地方?显然,由于这是家庭作业,我不需要有人给我答案;我只想知道我的解释是否合理(这三行意味着我感到困惑)。

非常感谢您的帮助!

*编辑***

read_six_numbers函数反汇编如下:

0x08048d2c <read_six_numbers+0>:    push   %ebp
0x08048d2d <read_six_numbers+1>:    mov    %esp,%ebp
0x08048d2f <read_six_numbers+3>:    push   %esi
0x08048d30 <read_six_numbers+4>:    push   %ebx
0x08048d31 <read_six_numbers+5>:    sub    $0x30,%esp
0x08048d34 <read_six_numbers+8>:    mov    0xc(%ebp),%eax
0x08048d37 <read_six_numbers+11>:   add    $0x14,%eax
0x08048d3a <read_six_numbers+14>:   mov    0xc(%ebp),%edx
0x08048d3d <read_six_numbers+17>:   add    $0x10,%edx
0x08048d40 <read_six_numbers+20>:   mov    0xc(%ebp),%ecx
0x08048d43 <read_six_numbers+23>:   add    $0xc,%ecx
0x08048d46 <read_six_numbers+26>:   mov    0xc(%ebp),%ebx
0x08048d49 <read_six_numbers+29>:   add    $0x8,%ebx
0x08048d4c <read_six_numbers+32>:   mov    0xc(%ebp),%esi
0x08048d4f <read_six_numbers+35>:   add    $0x4,%esi
0x08048d52 <read_six_numbers+38>:   mov    %eax,0x1c(%esp)
0x08048d56 <read_six_numbers+42>:   mov    %edx,0x18(%esp)
0x08048d5a <read_six_numbers+46>:   mov    %ecx,0x14(%esp)
0x08048d5e <read_six_numbers+50>:   mov    %ebx,0x10(%esp)
0x08048d62 <read_six_numbers+54>:   mov    %esi,0xc(%esp)
0x08048d66 <read_six_numbers+58>:   mov    0xc(%ebp),%eax
0x08048d69 <read_six_numbers+61>:   mov    %eax,0x8(%esp)
0x08048d6d <read_six_numbers+65>:   movl   $0x804965d,0x4(%esp)
0x08048d75 <read_six_numbers+73>:   mov    0x8(%ebp),%eax
0x08048d78 <read_six_numbers+76>:   mov    %eax,(%esp)
0x08048d7b <read_six_numbers+79>:   call   0x80485a4 <sscanf@plt>
0x08048d80 <read_six_numbers+84>:   mov    %eax,-0xc(%ebp)
0x08048d83 <read_six_numbers+87>:   cmpl   $0x5,-0xc(%ebp)
0x08048d87 <read_six_numbers+91>:   jg     0x8048d8e <read_six_numbers+98>
0x08048d89 <read_six_numbers+93>:   call   0x8048fec <explode_bomb>
0x08048d8e <read_six_numbers+98>:   add    $0x30,%esp
0x08048d91 <read_six_numbers+101>:  pop    %ebx
0x08048d92 <read_six_numbers+102>:  pop    %esi
0x08048d93 <read_six_numbers+103>:  pop    %ebp
0x08048d94 <read_six_numbers+104>:  ret    

1 个答案:

答案 0 :(得分:7)

mov    -0x24(%ebp,%eax,4),%eax

上述指令正在访问数组的元素。这在x86中称为SIB寻址,用于Scale,Index,Base。还有一个Offset组件。该阵列基于由Base寄存器(此处为EBP)加上偏移量确定的地址(当使用帧指针时,局部变量,包括数组,被寻址为帧指针的偏移量)。元素编号位于索引寄存器(此处为EAX)。每个元素的大小由Scale(4此处)确定。

mov    0x804a6d8,%eax
mov    $0xffffffff,%ecx
mov    %eax,-0x2c(%ebp)
mov    $0x0,%eax
cld                         
mov    -0x2c(%ebp),%edi
repnz scas %es:(%edi),%al
mov    %ecx,%eax
not    %eax
sub    $0x1,%eax

这只是strlen(0x805a6d8)ES:EDI指向要在0x804a6d8处扫描(再次比较参考字节)的字符串。 AL包含要扫描的字符:0 - ASCII NULcld设置扫描方向:升序(std会使扫描下降)。 ECX被初始化为~0 = -1:所有位1. repnz重复scas(SCAN STRING)指令递减ECXECX不为零(这不会发生,因为ECX足够大以防止这种情况)并且扫描不成功(NZ,而扫描(在字符串和参考AL之间进行比较)没有设置零标志)。之后,ECX包含-1-(steps in the scan)NOT生成(steps in the scan)SUB生成(steps in the scan) - 1 = (length of string not including the terminating NUL)。也在http://www.int80h.org/strlen/解释。