我有以下C代码:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
double enorm(double* v1, int length){
cublasHandle_t handle;
double result = 0;
double* vector;
cudaMalloc((void**) &vector, length * sizeof(double));
cublasSetVector(length, sizeof(double), v1, 1, vector,1);
cublasCreate(&handle);
cublasDnrm2(handle, length, vector, 1, &result);
cudaFree(vector);
return result;
}
double testnorm(double* v1, int len){
double tmp = 0;
for(int i = 0; i < len; i++){
tmp += v1[i]*v1[i];
}
return sqrt(tmp);
}
int main() {
double* a = malloc(2 * sizeof(double));
a[0] = 3;
a[1] = 4;
printf("%.f\n", enorm(a, 2));
printf("%.f\n", testnorm(a,2));
return 0;
}
遵循Haskell代码,它借用了上面的函数:
import qualified Foreign.Ptr as P
import System.IO.Unsafe
import Foreign.C.Types
import qualified Data.Vector.Storable as SV
import Foreign.C.Types
foreign import ccall "enorm" c_enorm :: P.Ptr CDouble -> CInt -> CDouble
foreign import ccall "testnorm" c_testnorm :: P.Ptr CDouble -> CInt -> CDouble
enorm :: SV.Vector CDouble -> CDouble
enorm v1 = unsafePerformIO $ do
let len = fromIntegral $ SV.length v1
SV.unsafeWith v1 $ \ptr -> return (c_enorm ptr len)
testnorm :: SV.Vector CDouble -> CDouble
testnorm v1 = unsafePerformIO $ do
let len = fromIntegral $ SV.length v1
SV.unsafeWith v1 $ \ptr -> return (c_testnorm ptr len)
main :: IO ()
main = do
let a = SV.fromList [3,4] :: SV.Vector CDouble
print $ enorm a
print $ testnorm a
由于某些原因,即使C程序返回,如预期的5和5,Haskell等效返回0和5,这意味着即使数组成功传递给C函数,将数据复制到GPU也存在一些问题。这种行为的原因是什么以及如何做到这样才能产生预期的结果?
C编译为:
gcc -o cmain blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm
Haskell中:
gcc -c -fPIC -o testblas.o blas.c -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm
gcc -shared -o libtestblas.so testblas.o
ghc -o main Main.hs -I/opt/cuda/include -L/opt/cuda/lib64 -lcublas -lcudart -lm -L./ -ltestblas
export LD_LIBRARY_PATH=./; ./main
编辑:在Haskell中,cudaMalloc返回cudaErrorMemoryAllocation(无法分配足够的内存),而在C语言中,一切都以成功结束。