NEON Cortex-A8的组装示例

时间:2015-02-21 11:25:04

标签: c++ c linux eclipse gcc

我正在尝试编译并演示NEON for cortex A8的汇编代码,以便在BeagleBone Black board(BBB)上使用此二进制文件。我使用 eclipse 工具GCC编译器和下面列出的汇编程序,

  

GCC:arm-linux-gnueabi-gcc

     

汇编程序:arm-linux-gnueabi-as

示例中出现以下错误,对于我使用的每个示例,我都会发现类似的错误

Description Path    Resource    Location    Type
SP not allowed in register list -- `ldmia r12,{r4-r11,r13,lr}'      EXAMPLE_NEON    line 61, external location: /tmp/ccTXrczs.s C/C++ Problem

我使用的代码

/ ************************
     * neon.c      *      ************************ /

#include <stdio.h>


__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];

void* neontest_save_buffer[16];


void
neontest(unsigned short int *a, unsigned short int *b,
                unsigned short int* q)
{
  __asm__(
"   movw        r12, #:lower16:neontest_save_buffer\n\t"
"   movt        r12, #:upper16:neontest_save_buffer\n\t"
"   stmia       r12, {r4-r11, r13, lr}        @ save registers\n\t"
"   vld1.16     {q1}, [r0:128]\n\t"
"   vld1.16     {q2}, [r1:128]\n\t"
"   vadd.i16    q0, q1, q2\n\t"
"   vst1.32     {q0}, [r2:128]\n\t"
"   movw        r12, #:lower16:neontest_save_buffer\n\t"
"   movt        r12, #:upper16:neontest_save_buffer\n\t"
"   ldmia       r12, {r4-r11, r13, lr}        @ reload all registers and return\n\t"
"finish:\n\t"
    );

}

int
main(void)
{
    int i;

    for (i=0; i<8; i++)
    {
        data1[i]=i*10;
        data2[i]=5;
        out[i]=0;
    }

    neontest(data1, data2, out);

    printf("output is: ");
    for (i=0; i<7; i++)
    {
        printf("%d, ", out[i]);
    }
    printf("%d\n", out[i]);

  return(0);
}

2 个答案:

答案 0 :(得分:0)

好像你正在使用Thumb32模式,其中sp cannot be in the list of registers(来自[1])。

如果您还没有设置新的,为什么需要保存堆栈? 只是尝试从stm块和ldm块中删除r13。

答案 1 :(得分:0)

默认编译器使用拇指模式,添加&#34; -marm&#34;到命令行以ARM模式编译代码:

arm-linux-gnueabihf-gcc -mcpu = cortex-a8 -mfpu = neon -marm neon.c

您还可以调整代码以让编译器执行寄存器保存/恢复,这样就可以为ARM和thumb2指令集编译代码:

#include <stdio.h>


__attribute__((aligned (16)))
unsigned short int data1[8];
unsigned short int data2[8];
unsigned short int out[8];

void
neontest(unsigned short int *a, unsigned short int *b,
                unsigned short int* q)
{
    __asm volatile (
"   vld1.16     {q1}, [%[a]:128]\n\t"
"   vld1.16     {q2}, [%[b]:128]\n\t"
"   vadd.i16    q0, q1, q2\n\t"
"   vst1.32     {q0}, [%[q]:128]\n\t"

     : [q] "+r" (q)
     : [a] "r" (a), [b] "r" (b)
     : "q0", "q1", "q2"
    );
}

int
main(void)
{
    int i;

    for (i=0; i<8; i++)
    {
        data1[i]=i*10;
        data2[i]=5;
        out[i]=0;
    }

    neontest(data1, data2, out);

    printf("output is: ");
    for (i=0; i<7; i++)
    {
        printf("%d, ", out[i]);
    }
    printf("%d\n", out[i]);

  return(0);
}

arm-linux-gnueabihf-gcc -mcpu = cortex-a8 -mfpu = neon -marm neon2.c

arm-linux-gnueabihf-gcc -mcpu = cortex-a8 -mfpu = neon -mthumb neon2.c