我需要相乘,然后加上矩阵AxB + C。有没有办法在c ++中执行此操作,以便应用矢量化并且矩阵相乘和相加更快?如果有这样做的参考或代码,那就太好了。另外,我是否需要修改某些内容才能启用矢量化。
提前谢谢
编辑:
//g++ -I/path/to/eigen -O3 -DEIGEN_NO_DEBUG -fopenmp new.cpp
// ./a.out
#pragma GCC optimize("O3","unroll-loops","omit-frame-pointer","inline") //Optimization flags
#pragma GCC option("arch=native","tune=native","no-zero-upper") //Enable AVX
#pragma GCC target("avx") //Enable AVX
#include <x86intrin.h> //AVX/SSE Extensions
#include <bits/stdc++.h> //All main STD libraries
#include <iostream>
#include <chrono>
#include <Eigen/Dense>
#include <unsupported/Eigen/FFT>
using namespace Eigen;
int main()
{
// Matrix2d mat;
// mat << 1, 2,
// 3, 4;
// mat= MatrixXd::Random(3000,3000);
// Vector2d u(-1,1), v(2,0);
// Matrix2d m1;
// auto start = std::chrono::steady_clock::now();
//
// m1 << mat*mat;
// auto end = std::chrono::steady_clock::now();
// std::cout << "Elapsed time in nanoseconds : "
// << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
// << " ns" ;
// std::cout << "Here is mat*mat:\n" << mat*mat << std::endl;
// std::cout << "Here is mat*u:\n" << mat*u << std::endl;
// std::cout << "Here is u^T*mat:\n" << u.transpose()*mat << std::endl;
// std::cout << "Here is u^T*v:\n" << u.transpose()*v << std::endl;
// std::cout << "Here is u*v^T:\n" << u*v.transpose() << std::endl;
// std::cout << "Let's multiply mat by itself" << std::endl;
// mat = mat*mat;
// std::cout << "Now mat is mat:\n" << mat << std::endl;
// mat << 1, 2,
// 3, 4;
MatrixXf m1 = MatrixXf::Random(32,32);
MatrixXf m2 = MatrixXf::Random(32,32);
MatrixXf newM;
auto start = std::chrono::steady_clock::now();
newM=m1*m2;
auto end = std::chrono::steady_clock::now();
std::cout << "Elapsed time in nanoseconds : "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " ns" ;
MatrixXf a = MatrixXf::Random(1, 32);
MatrixXf b = MatrixXf::Random(32, 32);
MatrixXf c = MatrixXf::Random(32, 1);
time_t start1 = clock();
start = std::chrono::steady_clock::now();
MatrixXf d = a * b * c;
end = std::chrono::steady_clock::now();
std::cout << "Elapsed time in nanoseconds : "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " ns" ;
std::cout << (double)(clock() - start1) / CLOCKS_PER_SEC * 1000 << "ms" << std::endl;
const int ROWS = 32;
const int COLS = 32;
int random_matrix[ROWS][COLS];
int i;
int j;
/* initialize random seed: */
srand(static_cast<unsigned>(time(0)));
/* generate number: */
for (i=0;i<ROWS;i++)
{
for (j=0;j<COLS;j++)
random_matrix[i][j]= rand() % 100;
// std::cout << random_matrix[i][j] << std::endl; //display table
}
int mult[10][10], r1, c1, r2, c2, k;
r1=32*32;
c2=32*32;
c1=32*32;
start = std::chrono::steady_clock::now();
start1 = clock();
for(i = 0; i < r1; ++i)
for(j = 0; j < c2; ++j)
{
mult[i][j]=0;
}
for(i = 0; i < r1; ++i)
for(j = 0; j < c2; ++j)
for(k = 0; k < c1; ++k)
{
// std::cout << "Elapsed time in nanoseconds : "
// << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
// << " ns" ;
mult[i][j] += random_matrix[i][k] * random_matrix[k][j]* random_matrix[k][j];
}
end = std::chrono::steady_clock::now();
double end1=clock() - start1;
std::cout << "Elapsed time in nanoseconds : "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()
<< " ns" ;
std::cout << (double)(end1) / CLOCKS_PER_SEC * 1000 << "ms" << std::endl;
return 0;