我尝试编译并运行以下代码,取自cuBLAS指南。
//Example 2. Application Using C and CUBLAS: 0-based indexing
//-----------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include "cublas_v2.h"
#define M 6
#define N 5
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int n, int p, int q, float alpha, float beta) {
cublasSscal(handle, n - p, &alpha, &m[IDX2C(p, q, ldm)], ldm); cublasSscal(handle, ldm - p, &beta, &m[IDX2C(p, q, ldm)], 1);
}
int main(void) {
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
int i, j;
float* devPtrA;
float* a = 0;
a = (float *)malloc(M * N * sizeof(*a));
if (!a) {
printf("host memory allocation failed");
return EXIT_FAILURE;
}
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
a[IDX2C(i, j, M)] = (float)(i * M + j + 1);
}
}
cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a));
if (cudaStat != cudaSuccess) {
printf("device memory allocation failed");
return EXIT_FAILURE;
}
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("CUBLAS initialization failed\n");
return EXIT_FAILURE;
}
stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("data download failed");
cudaFree(devPtrA);
cublasDestroy(handle);
return EXIT_FAILURE;
}
modify(handle, devPtrA, M, N, 1, 2, 16.0f, 12.0f);
stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf("data upload failed"); cudaFree(devPtrA);
cublasDestroy(handle);
return EXIT_FAILURE;
}
cudaFree(devPtrA);
cublasDestroy(handle);
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
printf("%7.0f", a[IDX2C(i, j, M)]);
}
printf("\n");
}
free(a);
return EXIT_SUCCESS;
}
我得到了一些未解决的符号错误,我已经检查了我的链接器设置是否正常,以及包含。他们似乎没问题,可能是什么问题?我的猜测是问题在于cublas_v2.h
包括在内,我还试图用简单的cublas.h
来改变它,但仍然无法正常工作。
未解析的符号为cublasCreate_v2,cublasDestroy_v2,cublasSetMatrix,cublasGetMatrix,cublasSscal_v2