在GPU和CPU上运行矩阵乘法时,我得到相同的结果。
这是我的代码:。
viennacl::ocl::set_context_platform_index(1, 1);
viennacl::ocl::set_context_platform_index(0, 0);
viennacl::ocl::switch_context(0);
std::cout << "--- Computing matrix-matrix product using viennacl in GPU ---" << std::endl;
timer.start();
vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
exec_time = timer.get();
std::cout << " - Execution time: " << exec_time << std::endl;
std::cout << "result on GPU: "<<viennacl::ocl::current_device().name() << std::endl;
//same operation on CPU
std::cout << "coming here" << std::endl;
viennacl::ocl::switch_context(1);
std::cout << "--- Computing matrix-matrix product using viennacl in CPU ---" << std::endl;
timer.start();
vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
exec_time = timer.get();
std::cout << " - Execution time: " << exec_time << std::endl;
std::cout << "result on CPU: " << viennacl::ocl::current_device().name() << std::endl << std::endl;
这是我的结果:
--- Computing matrix-matrix product using viennacl in GPU ---
- Execution time: 24.4675
result on GPU: GeForce GTX 1080
coming here
--- Computing matrix-matrix product using viennacl in CPU ---
- Execution time: 24.4654
result on CPU: Intel(R) Xeon(R) CPU E3-1225 v5 @ 3.30GHz
请帮助我解决此问题。 预先感谢
答案 0 :(得分:0)
最后我在CPU和GPU上得到了正确的结果:
代码:
int main()
{
typedef float ScalarType;
viennacl::tools::timer timer;
double exec_timecpu;
double exec_timegpu;
viennacl::tools::uniform_random_numbers<ScalarType> randomNumber;
viennacl::ocl::set_context_platform_index(1, 1);
viennacl::ocl::set_context_platform_index(0, 0);
viennacl::ocl::switch_context(1);
viennacl::matrix<ScalarType> vcl_A(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType, viennacl::column_major> vcl_B(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType> vcl_C(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
for (unsigned int i = 0; i < vcl_A.size1(); ++i)
for (unsigned int j = 0; j < vcl_A.size2(); ++j)
vcl_A(i,j) = randomNumber();
for (unsigned int i = 0; i < vcl_B.size1(); ++i)
for (unsigned int j = 0; j < vcl_B.size2(); ++j)
vcl_B(i,j) = randomNumber();
std::cout << std::endl;
std::cout << "--- Computing matrix-matrix product using viennacl in CPU ---" << std::endl;
timer.start();
vcl_C = viennacl::linalg::prod(vcl_A, vcl_B);
viennacl::backend::finish();
exec_timecpu = timer.get();
std::cout << " - Execution time: " << exec_timecpu << std::endl;
std::cout << "result on CPU: " << viennacl::ocl::current_device().name() << std::endl << std::endl;
//same operation on GPU
viennacl::ocl::switch_context(0);
viennacl::matrix<ScalarType > vcl_GA(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType > vcl_GB(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
viennacl::matrix<ScalarType > vcl_GC(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE);
for (unsigned int i = 0; i < vcl_GA.size1(); ++i)
for (unsigned int j = 0; j < vcl_GA.size2(); ++j)
vcl_GA(i,j) = randomNumber();
for (unsigned int i = 0; i < vcl_GB.size1(); ++i)
for (unsigned int j = 0; j < vcl_GB.size2(); ++j)
vcl_GB(i,j) = randomNumber();
std::cout << "--- Computing matrix-matrix product using viennacl in GPU ---" << std::endl;
vcl_GC = viennacl::linalg::prod(vcl_GA, vcl_GB);
timer.start();
vcl_GC = viennacl::linalg::prod(vcl_GA, vcl_GB);
viennacl::backend::finish();
exec_timegpu = timer.get();
std::cout << " - Execution time: " << exec_timegpu << std::endl;
std::cout << "result on GPU: "<<viennacl::ocl::current_device().name() << std::endl;
return 0;
}
输出:
--- Computing matrix-matrix product using viennacl in CPU ---
- Execution time: 0.559754
result on CPU: Intel(R) Xeon(R) CPU E3-1225 v5 @ 3.30GHz
--- Computing matrix-matrix product using viennacl in GPU ---
- Execution time: 0.004177
result on GPU: GeForce GTX 1080
注意事项: *请确保在标题中定义VIENNACL_WITH_OPENCL。
*为不同的设备创建不同的缓冲区,因为在opencl中缓冲区与计算设备互连,因此我们不能在两个不同的设备中使用相同的缓冲区。
**请确保添加viennacl :: backend :: finish()以等待内核完成执行。