Qt Creator 32位与CUDA V5

时间:2013-04-29 14:22:10

标签: qt cuda qt-creator

我正在尝试在发布模式下在Qt上编译简单代码CUDA。我的.pro

TEMPLATE = app
CONFIG += console

QT       += core
CONFIG   -= app_bundle
QT       -= gui


# Define output directories
DESTDIR = release
OBJECTS_DIR = obj
CUDA_OBJECTS_DIR = OBJECTS_DIR/cuda


SOURCES += main.cpp 

# This makes the .cu files appear in your project
OTHER_FILES +=  vectorAddition.cu


# CUDA settings <-- may change depending on your system

CUDA_SDK = "C:/ProgramData/NVIDIA Corporation/CUDA Samples/v5.0"   # Path to cuda SDK install
CUDA_DIR = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v5.0"            # Path to cuda toolkit install
SYSTEM_NAME = Win32         # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 32            # '32' or '64', depending on your system
CUDA_ARCH += sm_20          # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'



NVCC_OPTIONS = --use_fast_math
NVCCFLAGS    = --compiler-options -fno-strict-aliasing -use_fast_math --ptxas-options=-v

# include paths
INCLUDEPATH += $$CUDA_DIR/include \
               $$CUDA_SDK/common/inc

# library directories
QMAKE_LIBDIR += $$CUDA_DIR/lib/$$SYSTEM_NAME \
                $$CUDA_SDK/common/lib/$$SYSTEM_NAME 

# Add the necessary libraries
LIBS += -lcuda -lcudart



# The following makes sure all path names (which often include spaces) are put between quotation marks
CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')

# Configuration of the Cuda compiler
CONFIG(debug, debug|release) {
   # debug mode
    cuda.input =vectorAddition.cu
    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.obj
    cuda.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG -m32 -g -G   -c $$NVCCFLAGS $$NVCC_OPTIONS $$CUDA_INC $$LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT}${QMAKE_FILE_NAME}
    cuda.dependency_type = TYPE_C
    QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
       # Release mode
    cuda.input    = vectorAddition.cu
    cuda.output   = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
    cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$CUDA_INC  $$LIBS --machine $$SYSTEM_TYPE -c -o ${QMAKE_FILE_OUT}${QMAKE_FILE_NAME}
    cuda.dependency_type = TYPE_C
    QMAKE_EXTRA_COMPILERS += cuda
    }

vectorAddition.cu

extern "C"
__global__ void vectorAdditionCUDA(const float* a, const float* b, float* c, int n)
{
    int ii = blockDim.x * blockIdx.x + threadIdx.x;
    if (ii < n)
        c[ii] = a[ii] + b[ii];
}


void vectorAddition(const float* a, const float* b, float* c, int n) {
    float *a_cuda, *b_cuda, *c_cuda;
    unsigned int nBytes = sizeof(float) * n;
    int threadsPerBlock = 256;
    int blocksPerGrid   = (n + threadsPerBlock - 1) / threadsPerBlock;

    // allocate and copy memory into the device
    cudaMalloc((void **)& a_cuda, nBytes);
    cudaMalloc((void **)& b_cuda, nBytes);
    cudaMalloc((void **)& c_cuda, nBytes);
    cudaMemcpy(a_cuda, a, nBytes, cudaMemcpyHostToDevice);
    cudaMemcpy(b_cuda, b, nBytes, cudaMemcpyHostToDevice);

    vectorAdditionCUDA<<<blocksPerGrid, threadsPerBlock>>>(a_cuda, b_cuda, c_cuda, n);

    // load the answer back into the host
    cudaMemcpy(c, c_cuda, nBytes, cudaMemcpyDeviceToHost);

    cudaFree(a_cuda);
    cudaFree(b_cuda);
    cudaFree(c_cuda);
}

Main.cpp

#include <iostream>
#include <QtCore/QCoreApplication>
#include <QDebug>
#include <cuda.h>
using namespace std;

extern"C" void vectorAddition(const float* a, const float* b, float* c, int n);

void printArray(const float* a, const unsigned int n) {
 QString s = "(";
 unsigned int ii;
 for (ii = 0; ii < n - 1; ++ii)
        s.append(QString::number(a[ii])).append(", ");

  s.append(QString::number(a[ii])).append(")");
  qDebug() << s;
}

int main()
{      
    cout << "Hello World!:" <<endl;
    int  deviceCount = 0;
    int  cudaDevice = 0;
    char cudaDeviceName [100];

    unsigned int N = 50;
    float *a, *b, *c;

    cuInit(0);
    cuDeviceGetCount(&deviceCount);
    cuDeviceGet(&cudaDevice, 0);

    cuDeviceGetName(cudaDeviceName, 100, cudaDevice);
    qDebug() << "Number of devices: " <<  deviceCount;
    qDebug() << "Device name:"        <<  cudaDeviceName;

    a = new float [N];    b = new float [N];    c = new float [N];
    for (unsigned int ii = 0; ii < N; ++ii) {
        a[ii] = qrand();
        b[ii] = qrand();
    }
    // This is the function call in which the kernel is called
     vectorAddition(a,b,c,N);

    qDebug() << "input a:"; printArray(a, N);
    qDebug() << "input b:"; printArray(b, N);
    qDebug() << "output c:"; printArray(c, N);

    if (a) delete a;
    if (b) delete b;
    if (c) delete c;

    return 0;
}

当我尝试编译时,我得到一个“对'vectorAddition'的未定义引用”错误。我认为问题是Qt Creator没有将vectorAddition.cu文件添加到项目树中,即使我将其添加到OTHER_FILES

1 个答案:

答案 0 :(得分:1)

我认为您错过了CUDA_SOURCES += vectorAddition.cu,它指定了NVCC的cu文件

这是我的.pro文件,适用于我(Qt 5.2。+ MSVC2012)

#-------------------------------------------------

# Project created by QtCreator 

#-------------------------------------------------


TEMPLATE = app
CONFIG += console

QT       += core
CONFIG   -= app_bundle
QT       -= gui
SOURCES += main.cpp\

INCLUDEPATH += $$PWD/
DEPENDPATH += $$PWD/

DESTDIR = debug           
OBJECTS_DIR = debug/obj           # directory where .obj files will be saved
CUDA_OBJECTS_DIR = debug/obj      # directory where .obj  of cuda file will be saved
# This makes the .cu files appear in your project
OTHER_FILES += vectorAddition.cu      # this is your cu file need to compile

# CUDA settings <-- may change depending on your system (i think you missed this)

CUDA_SOURCES += vectorAddition.cu   # let NVCC know which file you want to compile CUDA NVCC

CUDA_SDK = "C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 4.0\C"   # Path to cuda SDK install
CUDA_DIR = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.5"
SYSTEM_NAME = Win32         # Depending on your system either 'Win32', 'x64', or 'Win64'
SYSTEM_TYPE = 32            # '32' or '64', depending on your system
CUDA_ARCH = sm_20           # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
NVCC_OPTIONS += --use_fast_math # default setting

# include paths

INCLUDEPATH += $$CUDA_DIR/include\
                $$CUDA_SDK/common/inc\
                $$CUDA_SDK/../shared/inc\


# library directories
QMAKE_LIBDIR += $$join(CUDA_DIR,'" -I"','-I"','"')/lib/$$SYSTEM_NAME\
                $$join(CUDA_SDK,'" -I"','-I"','"')/common/lib/$$SYSTEM_NAME\
                $$join(CUDA_SDK,'" -I"','-I"','"')/../shared/lib/$$SYSTEM_NAME

# Add the necessary libraries
CUDA_LIBS= -lcuda -lcudart 
#add quotation for those directories contain space (Windows required)
CUDA_INC +=$$join(INCLUDEPATH,'" -I"','-I"','"')

LIBS += $$CUDA_LIBS
#nvcc config
# MSVCRT link option (static or dynamic, it must be the same with your Qt SDK link option)
MSVCRT_LINK_FLAG_DEBUG = "/MDd"
MSVCRT_LINK_FLAG_RELEASE = "/MD"

CONFIG(debug, debug|release) {
    #Debug settings
    # Debug mode
    cuda_d.input    = CUDA_SOURCES
    cuda_d.output   = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.obj
    cuda_d.commands = $$CUDA_DIR/bin/nvcc.exe -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$LIBS \
                      --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
                      --compile -cudart static -g -DWIN32 -D_MBCS \
                      -Xcompiler "/wd4819,/EHsc,/W3,/nologo,/Od,/Zi,/RTC1" \
                      -Xcompiler $$MSVCRT_LINK_FLAG_DEBUG \
                      -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
    cuda_d.dependency_type = TYPE_C
    QMAKE_EXTRA_COMPILERS += cuda_d
}
else {
     # Release settings
     cuda.input    = CUDA_SOURCES
     cuda.output   = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.obj
     cuda.commands = $$CUDA_DIR/bin/nvcc.exe $$NVCC_OPTIONS $$CUDA_INC $$LIBS \
                    --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH \
                    --compile -cudart static -DWIN32 -D_MBCS \
                    -Xcompiler "/wd4819,/EHsc,/W3,/nologo,/O2,/Zi" \
                    -Xcompiler $$MSVCRT_LINK_FLAG_RELEASE \
                    -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
     cuda.dependency_type = TYPE_C
     QMAKE_EXTRA_COMPILERS += cuda
}