如果我用
编译下面的代码g++ -std=c++11 -o omp_for_loop omp_for_loop.cpp -fopenmp -lpthread
或
g++ -std=c++11 -o omp_for_loop omp_for_loop.cpp -fopenmp -lpthread -L/usr/lib64/gcc/x86_64-suse-linux/4.8 -lgomp
它只使用一个核心。我哪里出错了?
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <array>
#include <time.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
int main(){
omp_set_num_threads(4);
int nthreads, tid;
#pragma omp parallel shared(nthreads) private(tid)
{
tid = omp_get_thread_num();
if (tid == 0)
{
nthreads = omp_get_num_threads();
printf("Number of threads = %d\n", nthreads);
}
printf("Thread %d starting...\n",tid);
}
int N=4096;
std::vector<double> values (N, 0.0);
std::ifstream F("time_spec_values.txt");
{
for (int q=0; q<N; q++)
{
F >> values[q];
}
}
/*for(int w=0; w<512; w++)
{
std::cout << values[w] << std::endl;
}*/
//std::cout << " ====================================================== " << std::endl;
double PI2=2*M_PI;
int square=N*N; //
std::vector<int> index (square, 0.0);
std::vector<double> outvalues (square, 0.0);
std::vector<double> RESULT (square, 0.0);
int i;
#pragma omp parallel for private(i)
for (i=0; i<square; i++)
{
index[i]=i;
}
int k,l, idx=0;
#pragma omp parallel for private(k,l,idx) shared(outvalues) schedule(dynamic,1024)
for (k=0; k<N; k++)
{
for (l=0; l<N; l++)
{
idx=(k*N)+l;
double VV=PI2*(values[k]-values[l]);
outvalues[idx]=VV;
}
}
int m;
#pragma omp parallel for private(m) shared(RESULT,outvalues) schedule(dynamic,1024)
for (m=0; m<square; m++)
{
RESULT[m]=outvalues[m];
}
std::cout << "index size = " << index.size()<< "outvalues size = " << outvalues.size() << std::endl;
for(int n=0; n<square; n++)
std::cout << RESULT[n] << std::endl;
}