我搜遍了全世界但无法解决这个问题! “cusolver test.exe中0x00007FFF3AD3D430(cusolver64_70.dll)的未处理异常:0xC0000005:访问冲突读取位置0x0000000400960004。” 我想用最小二乘解算器或Qr方法解决Ax = B ....我的代码编译没有错误但后来我得到这个错误! 代码在此代码的最后一行中断! 我的代码是
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cublas.h>
#include <cusolver_common.h>
#include <cusolverSp.h>
#include <cusparse.h>
#include "device_launch_parameters.h"
#include <cuda_runtime.h>
# include <memory.h>
# include <thrust/device_vector.h>
# include <thrust/host_vector.h>
# include <thrust/device_ptr.h>
# include <thrust/system/cuda/execution_policy.h>
double *dX, *X;
double *dY, *Y;
int NoOfBuses = 4;
void main(void)
{
cusparseStatus_t status;
int row;
double *matA, *d_matA;
size_t pitchd_matA;
int *dNnzPerRow;
double *dCsrValA, *H_CsrVal;
int *dCsrRowPtrA, *HCsrRowPtrA;
int *dCsrColIndA, *HCsrColIndA;
int totalNnz;
cusparseHandle_t handle = 0;
cusparseMatDescr_t descr = 0;
//---------------------------------------------------------------------------------------------------------------
matA = (double *)calloc(((NoOfBuses+1)*(NoOfBuses+1)), sizeof(double));
Y = (double *)calloc((NoOfBuses + 1), sizeof(double));
X = (double *)calloc((NoOfBuses + 1), sizeof(double));
//STTORING IN col MAJOR FORM
for (int Row = 1; Row <= NoOfBuses; Row++)
{
double value = 1;
for (int Col = 1; Col <= NoOfBuses; Col++)
{
matA[Row + Col*(NoOfBuses + 1)] = value;
value++;
}
}
double value = 1;
for (int index = 1; index <= NoOfBuses; index++)
{
Y[index] = value;
value++;
}
printf("\n");
printf("A matrix\n");
for (int Row = 0; Row <= NoOfBuses; Row++)
{
for (int Col = 0; Col <= NoOfBuses; Col++)
{
printf("%f\t",matA[Col + Row*(NoOfBuses + 1)] );
}
printf("\n");
}
printf("Y matrix\n\n");
for (int index = 0; index <= NoOfBuses; index++)
{
printf("%f\n",Y[index]);
}
//-------------------------------------------------------------------------------------------------------
cusparseCreate(&handle);
// Allocate device memory to store the sparse CSR representation of A
cudaMalloc((void **)&dCsrValA, sizeof(double)* 16 );
cudaMalloc((void **)&dCsrColIndA, sizeof(int)* 16);
cudaMalloc((void **)&dCsrRowPtrA, sizeof(int)* (NoOfBuses + 2));
// Allocate device memory for vectors and the dense form of the matrix A
//cudaMallocPitch((void **) &d_matA, &pitchd_matA, sizeof(double)*(NoOfBuses + 1), (NoOfBuses + 1));
cudaMalloc((void **)&d_matA, sizeof(double)* (NoOfBuses + 1)*(NoOfBuses + 1));
cudaMalloc((void **)&dNnzPerRow, sizeof(int)* NoOfBuses);
//transfer Matrix A,X & Y to the GPU
cudaMemcpy(dX, X, sizeof(double) * (NoOfBuses +1), cudaMemcpyHostToDevice);
cudaMemcpy(dY, Y, sizeof(double)* (NoOfBuses + 1), cudaMemcpyHostToDevice);
//cudaMemcpy2D(d_matA, pitchd_matA, matA, sizeof(double)*(NoOfBuses + 1), sizeof(double)*(NoOfBuses + 1), (NoOfBuses + 1), cudaMemcpyHostToDevice);
cudaMemcpy(d_matA,matA,sizeof(double) * (NoOfBuses + 1)*(NoOfBuses + 1), cudaMemcpyHostToDevice);
// Construct a descriptor of the matrix A
cusparseCreateMatDescr(&descr);
cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL);
cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO);
cusparseSetMatDiagType(descr, CUSPARSE_DIAG_TYPE_NON_UNIT);
//cusparseSetMatFillMode(descr, CUSPARSE_FILL_MODE_LOWER);
cusparseDnnz(handle, CUSPARSE_DIRECTION_COLUMN, (NoOfBuses+1), (NoOfBuses+1), descr, d_matA, (NoOfBuses+1), dNnzPerRow, &totalNnz);
printf("the total number of Non zero elements are = %d",totalNnz);
cusparseDdense2csr(handle, (NoOfBuses+1), (NoOfBuses+1), descr, d_matA, (NoOfBuses+1), dNnzPerRow, dCsrValA, dCsrRowPtrA, dCsrColIndA);
cudaMalloc((void **)&dX, sizeof(double)* (NoOfBuses + 1));
cudaMalloc((void **)&dY, sizeof(double)* (NoOfBuses + 1));
H_CsrVal = (double *)calloc((totalNnz), sizeof(double));
HCsrRowPtrA = (int *)calloc((NoOfBuses + 2), sizeof(int));
HCsrColIndA = (int *)calloc((totalNnz), sizeof(int));
cudaMemcpy(H_CsrVal, dCsrValA, sizeof(double) * (totalNnz), cudaMemcpyDeviceToHost);
cudaMemcpy(HCsrRowPtrA, dCsrRowPtrA, sizeof(int)* (NoOfBuses + 2), cudaMemcpyDeviceToHost);
cudaMemcpy(HCsrColIndA, dCsrColIndA, sizeof(int)* (totalNnz), cudaMemcpyDeviceToHost);
printf("Values\n\n");
for (int index = 0; index <= (totalNnz-1); index++)
{
printf("%f\n", H_CsrVal[index]);
}
printf("\ncol pointer matrix\n\n");
for (int index = 0; index <= (totalNnz - 1); index++)
{
printf("%d\n", HCsrColIndA[index]);
}
printf("\nrow ofssett pointer matrix\n\n");
for (int index = 0; index <= (NoOfBuses +2 - 1); index++)
{
printf("%d\n", HCsrRowPtrA[index]);
}
//------------------------------------------------------------------------------------
cusolverSpHandle_t handleSolver;
double tol = 0.0000001;
int reorder = 0;
int valuefor,*singularity = &valuefor;
*singularity = 0;
cudaStream_t streamId = NULL;
cusolverStatus_t Checker;
Checker=cusolverSpCreate(&handleSolver);
cusolverStatus_t cudasu = cusolverSpSetStream(handleSolver, streamId);
cusolverStatus_t pakao = cusolverSpDcsrlsvluHost(handleSolver,5, totalNnz, descr, dCsrValA, dCsrRowPtrA, dCsrColIndA, dY, tol, reorder, dX, singularity);
getchar();
}
答案 0 :(得分:1)
您正在使用API的Host
版本,但您正在将设备变量传递给它:
cudaMalloc((void **)&dCsrValA, sizeof(double)* 16 );
...
cusolverStatus_t pakao = cusolverSpDcsrlsvluHost(handleSolver,5, totalNnz, descr, dCsrValA, dCsrRowPtrA, dCsrColIndA, dY, tol, reorder, dX, singularity);
^^^^ ^^
参考cusolver documentation:
我们看到,对于主机路径,所有变量必须位于主机上,而不是设备上。