好吧,我最后一次编写一个或多或少有用的汇编程序代码可以追溯到20世纪80年代中期,试图制作一个Apple] [+ beep betterer ...
我想要实现的是使用x86处理器的SSE扩展。作为切入点,我使用了德语维基百科提供的代码。 (尝试在英文版中搜索'''''''''''''''''作为语言,然后滚动一点点。)
以下是我编写的代码:
#include <stdio.h>
#include <stdlib.h>
void mul_asm(float* out, float* in, unsigned int length)
{
unsigned int count, rest;
rest = (length * 4) % 16;
printf("rest: %i\n", rest);
count = (length * 4) - rest;
printf("count: %i\n", count);
if (count > 0)
{
__asm __volatile__(
".intel_syntax noprefix\n\t"
"loop:\n\t"
"movups xmm0,[ebx+ecx]\n\t"
"movups xmm1,[eax+ecx]\n\t"
"mulps xmm0,xmm1\n\t"
"movups [eax+ecx],xmm0\n\t"
"sub ecx,16\n\t"
"jnz loop\n\t"
".att_syntax prefix\n\t"
: : "a" (out), "b" (in), "c" (count), "d" (rest): "xmm0", "xmm1");
printf("\tcount: %i\n", count);
}
return;
}
int main()
{
unsigned int length = 8;
float* out = malloc(sizeof(float) * length);
if (out == 0) return 1;
float* in = malloc(sizeof(float) * length);
if (in == 0) return 1;
printf("So far so good...\n");
out[0] = 1.0f; out[1] = 2.0f; out[2] = 3.0f; out[3] = 4.0f;
out[4] = 5.0f; out[5] = 6.0f; out[6] = 7.0f; out[7] = 8.0f;
in[0] = 1.0f; in[1] = 2.0f; in[2] = 3.0f; in[3] = 4.0f;
in[4] = 5.0f; in[5] = 6.0f; in[6] = 7.0f; in[7] = 8.0f;
printf("outs and ins set.\n");
mul_asm(out, in, length);
printf("Multiplied.\n");
for (unsigned int i = 0; i < length; i++)
{
printf("out[%f] = %f\n", (double)i, out[i]);
}
getc(stdin);
free(out);
free(in);
return 0;
}
使用
编译代码时,编译器很高兴gcc -O3 -msse2 -Wall -Wextra test.c
( - O3启用SSE)
但是在执行代码时,它会给出#34; Speicherzugriffsfehler&#34;。像这样:
So far, so good...
outs and ins set.
rest: 0
count: 32
Speicherzugriffsfehler
(这是内存访问错误。)
同样,我是x86汇编程序编码的完全新手。我错过了什么?