我的系统是mac osx 10.8.5。此机器上的默认gcc为4.2(i686-apple-darwin11-llvm-gcc-4.2(GCC)4.2.1(基于Apple Inc. build 5658)(LLVM build 2335.15.00)) 我在/ usr / local上安装了gcc 4.9。
我有一个使用AVX2内在函数的代码。附上了带有消息的代码。
#include <stdio.h>
#include <stdlib.h>
#include <immintrin.h>
#include <sys/time.h>
#define SIZE 4
#define TIMES 1
void mmul(const float*a, const float* b, float*c){
int a_vindex1[4] ={0,0,0,0};
int b_vindex1[4] = {0,0,0,0};
int m,i,j,k;
__m128i a_index, b_index;
__m128 a1;
for (i=0;i< SIZE*SIZE; i+= 1){
m=(i/SIZE)*4 ;
for (j=0;j<4;j++){
b_vindex1[j] = i%SIZE+SIZE*j;
a_vindex1[j] = m+j;
}
a_index = *(__m128i*)&a_vindex1[0];
b_index = *(__m128i*)&b_vindex1[0];
a1 = _mm_i32gather_ps(a, a_index, 1);
printf("\nBINDEX %d,%d,%d,%d", b_vindex1[0],b_vindex1[1], b_vindex1[2], b_vindex1[3]);
printf("\nAINDEX %d,%d,%d,%d", a_vindex1[0],a_vindex1[1], a_vindex1[2], a_vindex1[3]);
}
}
int main(){
float * a, *b,*c;
int i,j;
double timetotal = 0.0;
struct timeval start,stop,start1, stop1;
a=(float*)calloc(SIZE*SIZE, sizeof(float));
b=(float*)calloc(SIZE*SIZE, sizeof(float));
c=(float*)calloc(SIZE*SIZE, sizeof(float));
for (i=0;i<SIZE*SIZE;i++){
a[i] = i;
b[i] = 0.5*i;
}
for (j=0;j<TIMES;j++){
gettimeofday(&start,NULL);
mmul(a,b,c);
gettimeofday(&stop,NULL);
timetotal += ((double)((stop.tv_sec-start.tv_sec)*1000000+ (stop.tv_usec-start.tv_usec)))/1000000;
}
//printf("\n time avegrare = %.8lf",timetotal/TIMES);
return 0;
}
现在,如果我使用gcc 4.9编译此代码,
gcc-4.9 -O3 -march = core-avx2 a7.c,我收到以下错误消息,
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:141:no such instruction: `vmovd %r8d, %xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:143:no such instruction: `vmovapd LC15(%rip), %ymm3'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:145:no such instruction: `vbroadcastss %xmm7, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:147:no such instruction: `vpaddd LC13(%rip), %ymm0,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:148:no such instruction: `vpaddd LC14(%rip), %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:149:no such instruction: `vcvtdq2ps %ymm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:150:no such instruction: `vmovups %ymm2, (%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:151:no such instruction: `vcvtdq2pd %xmm1, %ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:152:no such instruction: `vmulpd %ymm3, %ymm2,%ymm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:153:no such instruction: `vextracti128 $0x1, %ymm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:154:no such instruction: `vcvtpd2psy %ymm2, %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:155:no such instruction: `vcvtdq2pd %xmm1, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:156:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:157:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:158:no such instruction: `vinsertf128 $0x1, %xmm1,%ymm2,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:159:no such instruction: `vmovups %ymm1, (%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:162:no such instruction: `vcvtdq2ps %ymm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:163:no such instruction: `vmovups %ymm1, 32(%rcx)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:164:no such instruction: `vcvtdq2pd %xmm0, %ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:165:no such instruction: `vextracti128 $0x1, %ymm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:166:no such instruction: `vmulpd %ymm3, %ymm1,%ymm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:167:no such instruction: `vcvtdq2pd %xmm0, %ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:168:no such instruction: `vcvtpd2psy %ymm1, %xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:169:no such instruction: `vmulpd %ymm3, %ymm0,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:170:no such instruction: `vcvtpd2psy %ymm0, %xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:171:no such instruction: `vinsertf128 $0x1, %xmm0,%ymm1,%ymm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:172:no such instruction: `vmovups %ymm0, 32(%rax)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:178:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:179:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:181:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:182:no such instruction: `vmovsd LC16(%rip), %xmm2'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:183:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:184:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:185:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:186:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:187:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:188:no such instruction: `vmovss %xmm5, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:192:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:193:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:195:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:196:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:197:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:198:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:200:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:201:no such instruction: `vcvtsd2ss %xmm0, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:202:no such instruction: `vmovss %xmm6, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:205:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:206:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:208:no such instruction: `vxorps %xmm7, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:209:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:210:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:211:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:213:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:214:no such instruction: `vcvtsd2ss %xmm0, %xmm7,%xmm7'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:215:no such instruction: `vmovss %xmm7, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:218:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:219:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:221:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:222:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:223:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:224:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:226:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:227:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:228:no such instruction: `vmovss %xmm4, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:231:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:232:no such instruction: `vcvtsi2ss %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:234:no such instruction: `vxorps %xmm5, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:236:no such instruction: `vmovss %xmm0, (%rbx,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:237:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:238:no such instruction: `vcvtsi2sd %eax, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:239:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:240:no such instruction: `vcvtsd2ss %xmm0, %xmm5,%xmm5'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:241:no such instruction: `vmovss %xmm5, (%r12,%rdx,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:244:no such instruction: `vxorpd %xmm1, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:245:no such instruction: `vcvtsi2sd %edi, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:246:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:248:no such instruction: `vcvtsi2ss %edi, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:249:no such instruction: `vxorps %xmm6, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:251:no such instruction: `vmulsd %xmm2, %xmm1,%xmm1'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:252:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:253:no such instruction: `vcvtsd2ss %xmm1, %xmm6,%xmm6'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:254:no such instruction: `vmovss %xmm6, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:257:no such instruction: `vxorps %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:258:no such instruction: `vcvtsi2ss %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:260:no such instruction: `vxorps %xmm4, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:261:no such instruction: `vmovss %xmm0, (%rbx,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:262:no such instruction: `vxorpd %xmm0, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:263:no such instruction: `vcvtsi2sd %ecx, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:264:no such instruction: `vmulsd %xmm2, %xmm0,%xmm0'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:265:no such instruction: `vcvtsd2ss %xmm0, %xmm4,%xmm4'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:266:no such instruction: `vmovss %xmm4, (%r12,%rax,4)'
/var/folders/4n/28v8vnhn1s1fdmpdqjz3wtj00000gn/T//ccKFuPNS.s:270:no such instruction: `vzeroupper'
我想在使用gcc-4.9时需要更新汇编程序? 或者它看起来像其他一些问题...... 任何指针都会有帮助..
答案 0 :(得分:5)
添加选项:
-Wa,-q
到你的编译。
意义来自两个来源。首先是GCC手册:
-Wa,选项
将选项作为汇编程序的选项。如果选项包含逗号,则会在逗号中将其拆分为多个选项。
然后是AS(Mac OS X Mach-O基于GNU的汇编程序)手册:
-q使用clang(1)集成汇编程序代替基于GNU的系统汇编程序。这是x86和arm架构的默认设置。
注意此选项在Darwin和GCC的更高版本上也是必需的(例如在带有GCC 6的macOS 10.12下测试)。