我在SIMD中为Matrix乘法编写了这段代码,我可以在Visual Studio中编译,但现在我需要使用gcc / g ++在Ubuntu中编译它。
我应该使用哪些命令来编译它?我是否需要对代码本身进行任何更改?
#include <stdio.h>
#include <stdlib.h>
#include <xmmintrin.h>
#include <iostream>
#include <conio.h>
#include <math.h>
#include <ctime>
using namespace std;
#define MAX_NUM 1000
#define MAX_DIM 252
int main()
{
int l = MAX_DIM, m = MAX_DIM, n = MAX_DIM;
__declspec(align(16)) float a[MAX_DIM][MAX_DIM], b[MAX_DIM][MAX_DIM],c[MAX_DIM][MAX_DIM],d[MAX_DIM][MAX_DIM];
srand((unsigned)time(0));
for(int i = 0; i < l; ++i)
{
for(int j = 0; j < m; ++j)
{
a[i][j] = rand()%MAX_NUM;
}
}
for(int i = 0; i < m; ++i)
{
for(int j = 0; j < n; ++j)
{
b[i][j] = rand()%MAX_NUM;
}
}
clock_t Time1 = clock();
for(int i = 0; i < m; ++i)
{
for(int j = 0; j < n; ++j)
{
d[i][j] = b[j][i];
}
}
for(int i = 0; i < l; ++i)
{
for(int j = 0; j < n; ++j)
{
__m128 *m3 = (__m128*)a[i];
__m128 *m4 = (__m128*)d[j];
float* res;
c[i][j] = 0;
for(int k = 0; k < m; k += 4)
{
__m128 m5 = _mm_mul_ps(*m3,*m4);
res = (float*)&m5;
c[i][j] += res[0]+res[1]+res[2]+res[3];
m3++;
m4++;
}
}
//cout<<endl;
}
clock_t Time2 = clock();
double TotalTime = ((double)Time2 - (double)Time1)/CLOCKS_PER_SEC;
cout<<"Time taken by SIMD implmentation is "<<TotalTime<<"s\n";
Time1 = clock();
for(int i = 0; i < l; ++i)
{
for(int j = 0; j < n; ++j)
{
c[i][j] = 0;
for(int k = 0; k < m; k += 4)
{
c[i][j] += a[i][k] * b[k][j];
c[i][j] += a[i][k+1] * b[k+1][j];
c[i][j] += a[i][k+2] * b[k+2][j];
c[i][j] += a[i][k+3] * b[k+3][j];
}
}
}
Time2 = clock();
TotalTime = ((double)Time2 - (double)Time1)/CLOCKS_PER_SEC;
cout<<"Time taken by normal implmentation is "<<TotalTime<<"s\n";
getch();
return 0;
}
答案 0 :(得分:8)
您需要启用SSE,例如
$ g++ -msse3 -O3 -Wall -lrt foo.cpp -o foo
您还需要更改:
declspec(align(16))
这是特定于Windows的,更便于携带:
__attribute__ ((aligned(16)))