In VS 2013这是代码,当使用Openmp在for循环中引入并行性时,它没有提高速度。我想把时间缩短到20ms。可能吗?
当有或没有Openmp时,当前执行时间约为150ms。
speedtest.pro
TEMPLATE = app
CONFIG += console c++11
CONFIG -= app_bundle
CONFIG -= qt
SOURCES += main.cpp
QMAKE_CXXFLAGS += -openmp
的main.cpp
#include <iostream>
#include <chrono>
#include <ctime>
#include <omp.h>
#include <conio.h>
using namespace std;
void fun(bool *c, int *a, int aSize, int *b, int bSize)
{
#pragma omp parallel for
for (int i = 0; i<aSize; i++){
for (int j = 0; j<bSize; j++)
if ((a[i*3] - b[j*3])*(a[i*3] - b[j*3]) +
(a[i*3 + 1] - b[j*3 + 1])*(a[i*3 + 1] - b[j*3 + 1])
+ (a[i*3 + 2] - b[j*3 + 2])*(a[i*3 + 2] - b[j*3 + 2])<30)
c[i*bSize + j] = 1;
}
}
int main()
{
const int aSize = 16000;
const int bSize = 6000;
int *a = new int[aSize*3];
int *b = new int[bSize*3];
bool *c = new bool[aSize*bSize];
for (unsigned int i = 0; i < aSize; i++){
a[i*3] = (int)i;
a[i*3 + 1] = (int)i;
a[i*3 + 2] = (int)i;
}
for (unsigned int i = 0; i < bSize; i++){
b[i*3] = (int)i;
b[i*3 + 1] = (int)i;
b[i*3 + 2] = (int)i;
}
for (unsigned int i = 0; i < aSize*bSize; i++){
c[i] = 0;
}
std::chrono::time_point<std::chrono::system_clock> start, end;
start = std::chrono::system_clock::now();
fun(c, a, aSize, b, bSize);
end = std::chrono::system_clock::now();
int c1 = 0;
for (unsigned int i = 0; i < aSize*bSize; i++){
if (c[i] == 0){
c1 = 1; break;
}
}
if (c1 == 1)
cout << "Collision" << endl;
else
cout << "No collision" << endl;
std::chrono::duration<double> elapsed_seconds = end - start;
std::time_t end_time = std::chrono::system_clock::to_time_t(end);
std::cout << "finished computation at " << std::ctime(&end_time)
<< "elapsed time: " << 1000 * elapsed_seconds.count() << "ms\n";
getch();
delete [] a;
delete [] b;
delete [] c;
return 0;
}
答案 0 :(得分:1)
您需要在for #pragma omp parallel for
#pragma omp parallel for
for(int i=0;i<N;++i)
{
//....
}
使用Qt,您需要在.pro文件中添加:
QMAKE_CXXFLAGS += -fopenmp
QMAKE_LFLAGS += -fopenmp
答案 1 :(得分:0)
不要用时钟测量时间!它测量时钟滴答。如果您使用的是CPU的许多内核,结果可能是错误的。
使用例如std::chrono