用ARM MCU上的较小代码替换memcpy和.divsi3_skip_div0_test

时间:2016-12-22 12:05:41

标签: arm libc

我对https://hackaday.com/2016/11/21/step-up-to-the-1-kb-challenge/的条目包括一些巨大的函数,这些函数不是由我编写的任何C代码生成的。

000004e4 <.divsi3_skip_div0_test>:
 4e4:   b410        push    {r4}
 4e6:   1c04        adds    r4, r0, #0
 4e8:   404c        eors    r4, r1
 4ea:   46a4        mov ip, r4
 4ec:   2301        movs    r3, #1
 4ee:   2200        movs    r2, #0
 4f0:   2900        cmp r1, #0
 4f2:   d500        bpl.n   4f6 <.divsi3_skip_div0_test+0x12>
 4f4:   4249        negs    r1, r1
 4f6:   2800        cmp r0, #0
 4f8:   d500        bpl.n   4fc <.divsi3_skip_div0_test+0x18>
 4fa:   4240        negs    r0, r0
 4fc:   4288        cmp r0, r1
 4fe:   d32c        bcc.n   55a <.divsi3_skip_div0_test+0x76>
 500:   2401        movs    r4, #1
 502:   0724        lsls    r4, r4, #28
 504:   42a1        cmp r1, r4
 506:   d204        bcs.n   512 <.divsi3_skip_div0_test+0x2e>
 508:   4281        cmp r1, r0
 50a:   d202        bcs.n   512 <.divsi3_skip_div0_test+0x2e>
 50c:   0109        lsls    r1, r1, #4
 50e:   011b        lsls    r3, r3, #4
 510:   e7f8        b.n 504 <.divsi3_skip_div0_test+0x20>
 512:   00e4        lsls    r4, r4, #3
 514:   42a1        cmp r1, r4
 516:   d204        bcs.n   522 <.divsi3_skip_div0_test+0x3e>
 518:   4281        cmp r1, r0
 51a:   d202        bcs.n   522 <.divsi3_skip_div0_test+0x3e>
 51c:   0049        lsls    r1, r1, #1
 51e:   005b        lsls    r3, r3, #1
 520:   e7f8        b.n 514 <.divsi3_skip_div0_test+0x30>
 522:   4288        cmp r0, r1
 524:   d301        bcc.n   52a <.divsi3_skip_div0_test+0x46>
 526:   1a40        subs    r0, r0, r1
 528:   431a        orrs    r2, r3
 52a:   084c        lsrs    r4, r1, #1
 52c:   42a0        cmp r0, r4
 52e:   d302        bcc.n   536 <.divsi3_skip_div0_test+0x52>
 530:   1b00        subs    r0, r0, r4
 532:   085c        lsrs    r4, r3, #1
 534:   4322        orrs    r2, r4
 536:   088c        lsrs    r4, r1, #2
 538:   42a0        cmp r0, r4
 53a:   d302        bcc.n   542 <.divsi3_skip_div0_test+0x5e>
 53c:   1b00        subs    r0, r0, r4
 53e:   089c        lsrs    r4, r3, #2
 540:   4322        orrs    r2, r4
 542:   08cc        lsrs    r4, r1, #3
 544:   42a0        cmp r0, r4
 546:   d302        bcc.n   54e <.divsi3_skip_div0_test+0x6a>
 548:   1b00        subs    r0, r0, r4
 54a:   08dc        lsrs    r4, r3, #3
 54c:   4322        orrs    r2, r4
 54e:   2800        cmp r0, #0
 550:   d003        beq.n   55a <.divsi3_skip_div0_test+0x76>
 552:   091b        lsrs    r3, r3, #4
 554:   d001        beq.n   55a <.divsi3_skip_div0_test+0x76>
 556:   0909        lsrs    r1, r1, #4
 558:   e7e3        b.n 522 <.divsi3_skip_div0_test+0x3e>
 55a:   1c10        adds    r0, r2, #0
 55c:   4664        mov r4, ip
 55e:   2c00        cmp r4, #0
 560:   d500        bpl.n   564 <.divsi3_skip_div0_test+0x80>
 562:   4240        negs    r0, r0
 564:   bc10        pop {r4}
 566:   4770        bx  lr
 568:   2800        cmp r0, #0
 56a:   d006        beq.n   57a <.divsi3_skip_div0_test+0x96>
 56c:   db03        blt.n   576 <.divsi3_skip_div0_test+0x92>
 56e:   2000        movs    r0, #0
 570:   43c0        mvns    r0, r0
 572:   0840        lsrs    r0, r0, #1
 574:   e001        b.n 57a <.divsi3_skip_div0_test+0x96>
 576:   2080        movs    r0, #128    ; 0x80
 578:   0600        lsls    r0, r0, #24
 57a:   b407        push    {r0, r1, r2}
 57c:   4802        ldr r0, [pc, #8]    ; (588 <.divsi3_skip_div0_test+0xa4>)
 57e:   a102        add r1, pc, #8  ; (adr r1, 588 <.divsi3_skip_div0_test+0xa4>)
 580:   1840        adds    r0, r0, r1
 582:   9002        str r0, [sp, #8]
 584:   bd03        pop {r0, r1, pc}
 586:   46c0        nop         ; (mov r8, r8)
 588:   00000019    .word   0x00000019

000005a4 <memcpy>:
 5a4:   b5f0        push    {r4, r5, r6, r7, lr}
 5a6:   2a0f        cmp r2, #15
 5a8:   d935        bls.n   616 <memcpy+0x72>
 5aa:   1c03        adds    r3, r0, #0
 5ac:   430b        orrs    r3, r1
 5ae:   079c        lsls    r4, r3, #30
 5b0:   d135        bne.n   61e <memcpy+0x7a>
 5b2:   1c16        adds    r6, r2, #0
 5b4:   3e10        subs    r6, #16
 5b6:   0936        lsrs    r6, r6, #4
 5b8:   0135        lsls    r5, r6, #4
 5ba:   1945        adds    r5, r0, r5
 5bc:   3510        adds    r5, #16
 5be:   1c0c        adds    r4, r1, #0
 5c0:   1c03        adds    r3, r0, #0
 5c2:   6827        ldr r7, [r4, #0]
 5c4:   601f        str r7, [r3, #0]
 5c6:   6867        ldr r7, [r4, #4]
 5c8:   605f        str r7, [r3, #4]
 5ca:   68a7        ldr r7, [r4, #8]
 5cc:   609f        str r7, [r3, #8]
 5ce:   68e7        ldr r7, [r4, #12]
 5d0:   3410        adds    r4, #16
 5d2:   60df        str r7, [r3, #12]
 5d4:   3310        adds    r3, #16
 5d6:   42ab        cmp r3, r5
 5d8:   d1f3        bne.n   5c2 <memcpy+0x1e>
 5da:   1c73        adds    r3, r6, #1
 5dc:   011b        lsls    r3, r3, #4
 5de:   18c5        adds    r5, r0, r3
 5e0:   18c9        adds    r1, r1, r3
 5e2:   230f        movs    r3, #15
 5e4:   4013        ands    r3, r2
 5e6:   2b03        cmp r3, #3
 5e8:   d91b        bls.n   622 <memcpy+0x7e>
 5ea:   1f1c        subs    r4, r3, #4
 5ec:   08a4        lsrs    r4, r4, #2
 5ee:   3401        adds    r4, #1
 5f0:   00a4        lsls    r4, r4, #2
 5f2:   2300        movs    r3, #0
 5f4:   58ce        ldr r6, [r1, r3]
 5f6:   50ee        str r6, [r5, r3]
 5f8:   3304        adds    r3, #4
 5fa:   42a3        cmp r3, r4
 5fc:   d1fa        bne.n   5f4 <memcpy+0x50>
 5fe:   18ed        adds    r5, r5, r3
 600:   18c9        adds    r1, r1, r3
 602:   2303        movs    r3, #3
 604:   401a        ands    r2, r3
 606:   d005        beq.n   614 <memcpy+0x70>
 608:   2300        movs    r3, #0
 60a:   5ccc        ldrb    r4, [r1, r3]
 60c:   54ec        strb    r4, [r5, r3]
 60e:   3301        adds    r3, #1
 610:   4293        cmp r3, r2
 612:   d1fa        bne.n   60a <memcpy+0x66>
 614:   bdf0        pop {r4, r5, r6, r7, pc}
 616:   1c05        adds    r5, r0, #0
 618:   2a00        cmp r2, #0
 61a:   d1f5        bne.n   608 <memcpy+0x64>
 61c:   e7fa        b.n 614 <memcpy+0x70>
 61e:   1c05        adds    r5, r0, #0
 620:   e7f2        b.n 608 <memcpy+0x64>
 622:   1c1a        adds    r2, r3, #0
 624:   e7f8        b.n 618 <memcpy+0x74>
 626:   46c0        nop         ; (mov r8, r8)

我猜我自己可以编写更小,但时间效率更低的代码。

这可能吗?

我在哪里可以找到需要编辑的来源 - 我猜我应该在libc下寻找gcc-arm-none-eabi/lib/gcc/arm-none-eabi/4.8.3/的来源。我认为我已经找到了编译符号,但我找不到源代码。

~/gcc-arm-none-eabi$ grep -R divsi3_skip_div0_test *
Binary file lib/gcc/arm-none-eabi/4.8.3/libgcc.a matches
Binary file lib/gcc/arm-none-eabi/4.8.3/thumb/libgcc.a matches
Binary file lib/gcc/arm-none-eabi/4.8.3/armv6-m/libgcc.a matches
Binary file lib/gcc/arm-none-eabi/4.8.3/fpu/libgcc.a matches
Binary file lib/gcc/arm-none-eabi/4.8.3/armv7-ar/thumb/libgcc.a matches
Binary file lib/gcc/arm-none-eabi/4.8.3/armv7-ar/thumb/softfp/libgcc.a matches
Binary file lib/gcc/arm-none-eabi/4.8.3/armv7-ar/thumb/fpu/libgcc.a matches

或者,有没有办法告诉gcc在复制结构时不使用memcpy? (它们是10个字节,所以三个拇指指令应该完成这项工作。)我尝试添加-mno-memcpy-Wa,mno-memcpy,但都没有被识别。

更新

我已经解决了这个问题的memcpy部分 - 添加一个部分但足够的memcpy函数会阻止另一个添加。

size_t memcpy(uint8_t *restrict dst, uint8_t *restrict const src, size_t size) {
    int i;
    for (i = 0; i < size; i++) {
        dst[i] = src[i];
    }
    return i;
}

它更小,但效率更低,不会处理dst < src + size重叠。

000003ec <memcpy>:
 3ec:   b510        push    {r4, lr}
 3ee:   2300        movs    r3, #0
 3f0:   4293        cmp r3, r2
 3f2:   d003        beq.n   3fc <memcpy+0x10>
 3f4:   5ccc        ldrb    r4, [r1, r3]
 3f6:   54c4        strb    r4, [r0, r3]
 3f8:   3301        adds    r3, #1
 3fa:   e7f9        b.n 3f0 <memcpy+0x4>
 3fc:   1c18        adds    r0, r3, #0
 3fe:   bd10        pop {r4, pc}

为了澄清一下,我现在只是问我可以做些什么来用效率较低但代码较小的代码替换.divsi3_skip_div0_test代码。

我不清楚此代码的来源在哪里,或者如何编辑其来源。替换它看起来比memcpy更复杂,因为它看起来不像C函数,因为它的名字以.开头。

0 个答案:

没有答案