使用cuda常量内存时的Cusparse状态映射错误

时间:2016-04-25 20:32:42

标签: cuda

我正在使用cuda cusparse库来处理稀疏矩阵,我需要执行矩阵向量乘法(cusparseDcsrmv函数)。我有一个csr格式的稀疏矩阵d_A,当我用全局设备内存中分配的向量d_x调用此函数时,一切正常。但是当我想使用驻留在恒定设备内存中的向量时,我遇到了一个错误:CUSPARSE_STATUS_MAPPING_ERROR 文档说通常的解决方案是取消绑定任何以前绑定的纹理,但它与我正在做的事情无关。

有谁知道发生了什么?

const int ONES_SIZE = 5400;
__constant__ static double ONES_DEV[ONES_SIZE];

const cusparseDirection_t dirA_row = CUSPARSE_DIRECTION_ROW;
const cusparseOperation_t NON_TRANS = CUSPARSE_OPERATION_NON_TRANSPOSE;

int main(){    
    cudaSetDevice(0);

    int m = ONES_SIZE; int n = 2500;
    double * HOST_ONES, *A, *d_A, *d_result;
    HOST_ONES = (double*) malloc(ONES_SIZE*sizeof(double));
    for (int i=0; i<ONES_SIZE; i++)
        HOST_ONES[i] = 1.0;
    cudaMemcpyToSymbol(ONES_DEV, HOST_ONES, ONES_SIZE*sizeof(double), 0, cudaMemcpyHostToDevice);

    A = (double *) calloc(m*n, sizeof(double));
    // populate matrix A
    for(int i=0;i<1000; i++)
        A[i*2] = 1.5;

    cudaMalloc((void**)&d_A, m*n*sizeof(double));
    cudaMemcpy(d_A, A, m*n*sizeof(double), cudaMemcpyHostToDevice);

    cusparseHandle_t cusparse_handle = 0;
    cusparseMatDescr_t descrA=0;
    int *nnzTotal, *nnzPerRow, *csrRowPtrA, *csrColIndA;
    double* csrValA;
    int lda = m;
    const double positive = 1.0;
    const double zero = 0.0;

    cusparseCreate(&cusparse_handle);
    cusparseCreateMatDescr(&descrA);
    cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL);
    cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO);

    nnzTotal = (int*)malloc(sizeof(int));
    cudaMalloc((void**)&nnzPerRow, m*sizeof(int));
    cusparseDnnz(cusparse_handle, dirA_row, m, n, descrA, d_A, lda, nnzPerRow, nnzTotal);

    cudaMalloc((void**)&csrValA, (*nnzTotal)*sizeof(double));
    cudaMalloc((void**)&csrRowPtrA, (m+1)*sizeof(int));
    cudaMalloc((void**)&csrColIndA, (*nnzTotal)*sizeof(int));
    cudaMalloc((void**)&d_result, n*sizeof(double));

    // MATRIX CONVERSE FROM DENSE TO SPARSE
    cusparseDdense2csr(cusparse_handle, m, n, descrA, d_A, lda, nnzPerRow, csrValA, csrRowPtrA, csrColIndA);

    // MATRIX VECTOR MULTIPLICATION
    cusparseDcsrmv(cusparse_handle, NON_TRANS, m, n, *nnzTotal, &positive, descrA, csrValA, csrRowPtrA, csrColIndA, ONES_DEV, &zero, d_result);

1 个答案:

答案 0 :(得分:1)

使用Visual Profiler获取代码示例并运行它,同时跟踪API调用,我得到以下内容:

Visual profiler snapshot

cusparse方法调用cudaBindTexture。常量内存是一种特殊类型的内存,将纹理绑定到它似乎是不可能的,即使没有记录。