我正在尝试使用cuBLAS函数cublasSgemm编写一个简单的矩阵乘法示例。我的代码如下所示:
int m =100, n = 100;
float * bold1 = new float [m*n];
float * bold2 = new float [m*n];
float * bold3 = new float [m*n];
for (int i = 0; i< m; i++)
for(int j = 0; j <n;j++)
{
bold1[i*n+j]=rand()%10;
bold2[i*n+j]=rand()%10;
}
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
const float alpha = 1.0;
const float beta = 0;
float * dev_bold1, * dev_bold2, *dev_bold3;
cudaStat = cudaMalloc ((void**)&bold1, sizeof(float)*m*n);
if(cudaStat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem1";
return cudaStat;
}
cudaStat = cudaMalloc ((void**)&bold2,sizeof(float)*m*n);
if(cudaStat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem2";
return cudaStat;
}
cudaStat = cudaMalloc ((void**)&bold3,sizeof(float)*m*n);
if(cudaStat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem3";
return cudaStat;
}
cublasSetMatrix(m,n,sizeof(float),bold1,m,dev_bold1,m);
cublasSetMatrix(m,n,sizeof(float),bold2,m,dev_bold2,m);
stat = cublasCreate(&handle);
if(stat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem4";
return stat;
}
cout<<stat<<" "<<CUBLAS_STATUS_SUCCESS<<"\n";
stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, n ,&alpha, dev_bold1, n, dev_bold2, n, &beta,dev_bold3,m);
if (stat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem5";
return stat;
}
cudaStat = cudaMemcpy(bold3,dev_bold3,sizeof(float)*m*n,cudaMemcpyDeviceToHost);
if (cudaStat != cudaSuccess)
{
cout<<"problem6";
return cudaStat;
}
delete []bold1;
delete []bold2;
cudaFree(dev_bold1);
cudaFree(dev_bold2);
cudaFree(dev_bold3);
在这段代码中,我想要乘以用随机数填充的矩阵bold1和bold2。 代码返回&#34;问题5&#34;这与代码的这一部分有关:
stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, n ,&alpha, dev_bold1, n, dev_bold2, n, &beta,dev_bold3,m);
if (stat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem5";
return stat;
}
我还打印统计数据,它显示&#34; 13&#34;!
有人可以帮我理解我的代码有什么问题吗? 谢谢!
答案 0 :(得分:1)
主要的错误在于你的cudaMalloc
语句中,你正在分配错误的指针:
float * dev_bold1, * dev_bold2, *dev_bold3;
cudaStat = cudaMalloc ((void**)&bold1, sizeof(float)*m*n);
^
|
this should be dev_bold1
同样适用于其他2个cudaMalloc
陈述。
以下代码修复了这些错误,并且不返回运行时错误:
$ cat t1235.cu
#include <cublas_v2.h>
#include <iostream>
using namespace std;
int main(){
int m =100, n = 100;
float * bold1 = new float [m*n];
float * bold2 = new float [m*n];
float * bold3 = new float [m*n];
for (int i = 0; i< m; i++)
for(int j = 0; j <n;j++)
{
bold1[i*n+j]=rand()%10;
bold2[i*n+j]=rand()%10;
}
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
const float alpha = 1.0;
const float beta = 0;
float * dev_bold1, * dev_bold2, *dev_bold3;
cudaStat = cudaMalloc ((void**)&dev_bold1, sizeof(float)*m*n);
if(cudaStat != cudaSuccess)
{
cout<<"problem1";
return cudaStat;
}
cudaStat = cudaMalloc ((void**)&dev_bold2,sizeof(float)*m*n);
if(cudaStat != cudaSuccess)
{
cout<<"problem2";
return cudaStat;
}
cudaStat = cudaMalloc ((void**)&dev_bold3,sizeof(float)*m*n);
if(cudaStat != cudaSuccess)
{
cout<<"problem3";
return cudaStat;
}
cublasSetMatrix(m,n,sizeof(float),bold1,m,dev_bold1,m);
cublasSetMatrix(m,n,sizeof(float),bold2,m,dev_bold2,m);
stat = cublasCreate(&handle);
if(stat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem4";
return stat;
}
cout<<stat<<" "<<CUBLAS_STATUS_SUCCESS<<"\n";
stat = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, n ,&alpha, dev_bold1, n, dev_bold2, n, &beta,dev_bold3,m);
if (stat != CUBLAS_STATUS_SUCCESS)
{
cout<<"problem5";
return stat;
}
cudaStat = cudaMemcpy(bold3,dev_bold3,sizeof(float)*m*n,cudaMemcpyDeviceToHost);
if (cudaStat != cudaSuccess)
{
cout<<"problem6";
return cudaStat;
}
delete []bold1;
delete []bold2;
cudaFree(dev_bold1);
cudaFree(dev_bold2);
cudaFree(dev_bold3);
return 0;
}
$ nvcc -o t1235 t1235.cu -lcublas
$ cuda-memcheck ./t1235
========= CUDA-MEMCHECK
0 0
========= ERROR SUMMARY: 0 errors
$
我还更改了一些错误检查语句,其中您正在检查cuda错误返回值以反映cublas错误类型。