Question

如果此问题已经解决过，我道歉，但我已经做了一些搜索，到目前为止我空手而归。我正在尝试编译一个cuda版本的Hello World，稍微修改一下here。我的代码是：

// This is the REAL "hello world" for CUDA!
// It takes the string "Hello ", prints it, then passes it to CUDA with an array
// of offsets. Then the offsets are added in parallel to produce the string "World!"
// By Ingemar Ragnemalm 2010

#include <stdio.h>
#include <iostream>

const int N = 16; 
const int blocksize = 16; 

__global__ 
void hello(char *a, int *b) 
{
    a[threadIdx.x] += b[threadIdx.x];
}

int main()
{
    char a[N] = "Hello \0\0\0\0\0\0";
    int b[N] = {15, 10, 6, 0, -11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

    char *ad;
    int *bd;
    const int csize = N*sizeof(char);
    const int isize = N*sizeof(int);

    printf("%s", a);

    cudaMalloc( (void**)&ad, csize ); 
    cudaMalloc( (void**)&bd, isize ); 
    cudaMemcpy( ad, a, csize, cudaMemcpyHostToDevice ); 
    cudaMemcpy( bd, b, isize, cudaMemcpyHostToDevice ); 

    dim3 dimBlock( blocksize, 1 );
    dim3 dimGrid( 1, 1 );

    int runtime_version = -1;
    auto error_type_runtime = cudaRuntimeGetVersion(&runtime_version);
    int driver_version = -1;
    auto error_type_driver = cudaDriverGetVersion(&driver_version);


    std::cout << "Blocksize: " << blocksize << std::endl;
    std::cout << "NumBlocks: " << (N + blocksize - 1)/blocksize << std::endl;
    std::cout << "Runtime API: " << runtime_version << std::endl;
    std::cout << "cudaRuntimeGetVersion error type: " << error_type_runtime << std::endl;
    std::cout << "Driver API: " << driver_version << std::endl;
    std::cout << "cudaRuntimeGetVersion error type: " << error_type_driver << std::endl;

    hello<<<(N + blocksize - 1)/blocksize, dimBlock>>>(ad, bd);
    cudaMemcpy( a, ad, csize, cudaMemcpyDeviceToHost ); 
    cudaFree( ad );
    cudaFree( bd );

    printf("%s\n", a);
    return EXIT_SUCCESS;
}

但我明白了：

$ nvcc cuda_hello_world.cu -arch=sm_20 --std=c++11
nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
$ ./a.out
Hello Blocksize: 16
NumBlocks: 1
Runtime API: -1
cudaRuntimeGetVersion error type: 35
Driver API: 0
cudaRuntimeGetVersion error type: 0
Hello

我查了cuda错误35，这是＆＃39;表示安装的NVIDIA CUDA驱动程序早于CUDA运行时库，＆＃39;但是在跑完之后

$/usr/bin/nvidia-smi

我得到NVIDIA-SMI 375.82驱动程序版本：375.82，自2017年7月24日起，

$nvcc --version

的产率：

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2016 NVIDIA Corporation
Built on Tue_Jan_10_13:22:03_CST_2017
Cuda compilation tools, release 8.0, V8.0.61

所以看起来安装了正确的库/驱动程序，但是nvcc无法找到它们。如果我用-v构建我得到：

$ nvcc cuda_hello_world.cu -arch=sm_20 --std=c++11 -v
nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
#$ _SPACE_=
#$ _CUDART_=cudart
#$ _HERE_=/usr/local/cuda-8.0/bin
#$ _THERE_=/usr/local/cuda-8.0/bin
#$ _TARGET_SIZE_=
#$ _TARGET_DIR_=
#$ _TARGET_DIR_=targets/x86_64-linux
#$ TOP=/usr/local/cuda-8.0/bin/..
#$ NVVMIR_LIBRARY_DIR=/usr/local/cuda-8.0/bin/../nvvm/libdevice
#$ LD_LIBRARY_PATH=/usr/local/cuda-8.0/bin/../lib:
#$ PATH=/usr/local/cuda-8.0/bin/../open64/bin:/usr/local/cuda-8.0/bin/../nvvm/bin:/usr/local/cuda-8.0/bin:/home/michael/bin:/home/michael/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games/usr/local/games:/snap/bin:/usr/local/cuda-8.0/bin/:/usr/local/MATLAB/R2016b/bin/
#$ INCLUDES="-I/usr/local/cuda-8.0/bin/../targets/x86_64-linux/include"
#$ LIBRARIES=  "-L/usr/local/cuda-8.0/bin/../targets/x86_64-linux/lib/stubs" "-L/usr/local/cuda-8.0/bin/../targets/x86_64-linux/lib"

我是否因为不包含正确的库而犯了一个愚蠢的错误，或者这里发生了什么完全不同的事情？

Answer 1

如果其他人有这个问题，我能够解决它。事实证明，简单地更新/升级所有内容（包括nvidia驱动程序/库）可以解决问题。

CUDA hello_world未运行

1 个答案: