iOS Neon汇编程序示例问题

时间:2016-06-13 10:09:24

标签: ios objective-c assembly arm neon

尝试http://api.madewithmarmalade.com/ExampleArmASM.html并使用iOS;如果我注释掉循环并且res打印为28,程序就会运行。但是如果没有将它注释掉,它将会在不打印res的情况下异常终止。

任何暗示为什么以及如何解决它。

提前致谢。

我的代码如下:

#include <stdio.h> 
#include <stdlib.h>  
#define ARRAY_SIZE 512  

#if defined __arm__ && defined __ARM_NEON__

static int computeSumNeon(const int a[])
{
// Computes the sum of all elements in the input array
int res = 0;
asm(".align 4          \n\t"  //dennis warning avoiding
    "vmov.i32 q8, #0          \n\t"  //clear our accumulator register
    "mov r3, #512             \n\t"  //Loop condition n = ARRAY_SIZE
//        ".loop1:                  \n\t" // No loop add 0-7 works as 28
    "vld1.32 {d0, d1, d2, d3}, [%[input]]! \n\t" //load 8 elements into d0, d1, d2, d3 = q0, q1
    "pld [%[input]]           \n\t" // preload next set of elements
    "vadd.i32 q8, q0, q8      \n\t" // q8 += q0
    "vadd.i32 q8, q1, q8      \n\t" // q8 += q1
    "subs r3, r3, #8          \n\t" // n -= 8
//        "bne .loop1               \n\t" // n == 0?
    "vpadd.i32 d0, d16, d17   \n\t" // d0[0] = d16[0] + d16[1], d0[1] = d17[0] + d17[1]
    "vpaddl.u32 d0, d0        \n\t" // d0[0] = d0[0] + d0[1]
    "vmov.32 %[result], d0[0] \n\t"
    : [result] "=r" (res) , [input] "+r" (a)
    :
    : "q0", "q1", "q8", "r3");
return res;
}


#else

static int computeSumNeon(const int a[])
{
    int i, res = 0;
    for (i = 0; i < ARRAY_SIZE; i++)
        res += a[i];
}

#endif


...

@implementation AppDelegate


- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
// Override point for customization after application launch.
//int* inp;
int inp[ARRAY_SIZE];
//posix_memalign((void**)&inp, 64, ARRAY_SIZE*sizeof(int)); // Align to cache line size (64bytes on a cortex A8)
// Initialise the array with consecutive integers.
int i;
for (i = 0; i < ARRAY_SIZE; i++)
{
    inp[i] = i;
}
for (i = 0; i < ARRAY_SIZE; i++)
{
    printf("%i,", inp[i]);
}
printf("\n\n sum 0-7:%i\n", 0+1+2+3+4+5+6+7);

int res = 0;

res = computeSumNeon(inp);


printf("res NEO :%i\n", res);

// free(inp); // error pointer being free was not allocated !!!

UISplitViewController *splitViewController = (UISplitViewController *)self.window.rootViewController;
UINavigationController *navigationController = [splitViewController.viewControllers lastObject];
navigationController.topViewController.navigationItem.leftBarButtonItem = splitViewController.displayModeButtonItem;
splitViewController.delegate = self;
return YES;
}

- (void)applicationWillResignActive:(UIApplication *)application {
...

====生成汇编代码

    .align  1
    .code   16                      @ @computeSumNeon
    .thumb_func _computeSumNeon
_computeSumNeon:
Lfunc_begin3:
    .loc    18 133 0 is_stmt 1      @ ...
    .cfi_startproc
@ BB#0:
    sub sp, #8
    movs    r1, #0
    str r0, [sp, #4]
    .loc    18 135 9 prologue_end   @ ...
Ltmp18:
    str r1, [sp]
    .loc    18 136 5                @ ...
    ldr r0, [sp, #4]
    @ InlineAsm Start
    .align  4
    vmov.i32    q8, #0x0
    movw    r3, #504
.loop1:
    vld1.32 {d0, d1, d2, d3}, [r0]!
    vadd.i32    q8, q0, q8
    vadd.i32    q8, q1, q8
    subs    r3, #8
    bne .loop1
    vpadd.i32   d0, d16, d17
    vpaddl.u32  d0, d0
    vmov.32 r1, d0[0]

    @ InlineAsm End
    str r1, [sp]
    str r0, [sp, #4]
    .loc    18 155 12               @ ...
    ldr r0, [sp]
    .loc    18 155 5 is_stmt 0      @ ...
    add sp, #8
    bx  lr
Ltmp19:
Lfunc_end3:
    .cfi_endproc

0 个答案:

没有答案