Question

我在尝试将Qt与CUDA集成时遇到了问题。我在安装了64位CUDA工具包的64Bit Mac上运行，但是当我尝试构建我的代码时，会抛出错误ld: file not found: @rpath/CUDA.framework/Versions/A/CUDA for architecture x86_64。

我已经验证了所有路径，但同样的错误一直被抛出。我的.pro配置代码如下：

QT       += core gui
QT       += multimedia
QT       += multimediawidgets
QT       += concurrent

greaterThan(QT_MAJOR_VERSION, 4): QT += widgets

TARGET = WebcamFilter
TEMPLATE = app

SOURCES += main.cpp\
           mainwindow.cpp \
           camerafeed.cpp \

HEADERS  += mainwindow.h \
            camerafeed.h

FORMS    += mainwindow.ui

# CUDA Resources
CUDA_SOURCES += gaussian.cu
CUDA_DIR      = /usr/local/cuda
# Path to header and lib files
INCLUDEPATH  += $$CUDA_DIR/include
QMAKE_LIBDIR += $$CUDA_DIR/lib
# Libs used for source code
LIBS         += -lcudart -lcuda
# GPU Architecture
CUDA_ARCH     = sm_20
# Custom flags for nvcc
NVCCFLAGS     = --compiler-options -fno-strict-aliasing -use_fast_math --ptxas-options=-v
# Prepare extra compiler configuration
CUDA_INC      = $$join(INCLUDEPATH,' -I','-I',' ')
cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O3 -arch=$$CUDA_ARCH -c $$NVCCFLAGS \
                $$CUDA_INC $$LIBS  ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} \
                2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
cuda.dependency_type = TYPE_C
cuda.depend_command  = $$CUDA_DIR/bin/nvcc -O3 -M $$CUDA_INC $$NVCCFLAGS   ${QMAKE_FILE_NAME}

cuda.input = CUDA_SOURCES
cuda.output = ${OBJECTS_DIR}${QMAKE_FILE_BASE}_cuda.o
# Tell Qt that we want add more stuff to the Makefile
QMAKE_EXTRA_COMPILERS += cuda

Answer 1

几个月前我遇到过这个问题（加上其他一些问题在修复之后）所以我认为我现在已经发布了一个完全正常的QT / CUDA示例。对于Linux和Mac，我从大project中提取了大部分.pro文件（CUDA内容在gpu文件夹中），但这段代码只在OS X上测试过。

我目前正在使用：

CUDA 7.0驱动程序V7.0.27
OS X Yosemite 10.10.3
QT 5.3.1

如果您最近没有更新，请确保在尝试此代码之前，CUDA deviceQuery 和 bandwidthTest 示例仍然有效。

下面的.pro文件可能是您解决问题所需的全部内容，但C ++代码也在下面。代码注释完成了大部分解释。

qtcuda.pro

#-------------------------------------------------
#
# Project created by QtCreator 2015-05-02T02:37:39
#
#-------------------------------------------------

QT       += core gui

greaterThan(QT_MAJOR_VERSION, 4): QT += widgets

TARGET = qtcuda
TEMPLATE = app

# project build directories (if not using shadow build)
DESTDIR     = $$system(pwd)
BUILDDIR    = $$DESTDIR/build

MOC_DIR     = $$BUILDDIR # moc_... files
UI_DIR      = $$BUILDDIR # ui_mainwindow.cpp

OBJECTS_DIR = $$BUILDDIR/bin # .o binary files


SOURCES += main.cpp\
        mainwindow.cpp

HEADERS  += mainwindow.h

FORMS    += mainwindow.ui

# NOTE: C++ flags are needed here for
#       the CUDA Thrust library
############### UNIX FLAGS #####################
unix {
    QMAKE_CXXFLAGS += -std=c++11
}
############### MAC FLAGS #####################
macx {
    # libs that don't get passed to nvcc (we'll remove them from LIBS later)
    NON_CUDA_LIBS += -stdlib=libc++
    LIBS += $$NON_CUDA_LIBS

    QMAKE_CXXFLAGS += -stdlib=libc++ -mmacosx-version-min=10.7
    QMAKE_LFLAGS += -mmacosx-version-min=10.7
    QMAKE_MACOSX_DEPLOYMENT_TARGET = 10.7

    # specific to computers without older sdks
    MAC_SDK  = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.    sdk/
    if( exists( $$MAC_SDK) ) {
        QMAKE_MAC_SDK = macosx10.9 # lowest sdk on my computer :/
    }

    # don't show warnings for c++11 extentions
    QMAKE_CXXFLAGS += -Wno-c++11-extensions
}


################### CUDA ###################### (similar to your setup)

unix:!macx {
    SED_STUFF = 2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
}
macx {

    SED_STUFF = 2>&1 | sed -E \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
}

CUDA_DIR = /usr/local/cuda

# make sure cuda is available on the computer
if ( exists( $$CUDA_DIR/ ) ) {
    message( "Configuring for cuda...");
    DEFINES += CUDA_7 #  # same as putting this in code ->  #define CUDA_7

    # Cuda sources
    CUDA_SOURCES += cuda/wrappers.cu

    # show files in working tree
    OTHER_FILES +=  cuda/wrappers.cu \
                    cuda/wrappers.cuh \
                    cuda/helper_cuda.h

    # Path to cuda install
    CUDA_LIB = $$CUDA_DIR/lib

    # Pather to header and lib files
    INCLUDEPATH += $$CUDA_DIR/include \
                   cuda # my cuda files
    QMAKE_LIBDIR += $$CUDA_LIB

    # prevents warnings from code we didn't write
    QMAKE_CXXFLAGS += -isystem $$CUDA_DIR/include

    LIBS += -lcudart # add other cuda libs here (-lcublas -lcurand, etc.)

    # SPECIFY THE R PATH FOR NVCC!!!!! (your problem...previously my problem)
    QMAKE_LFLAGS += -Wl,-rpath,$$CUDA_LIB
    NVCCFLAGS = -Xlinker -rpath,$$CUDA_LIB

    # libs used in the code
    CUDA_LIBS = $$LIBS
    CUDA_LIBS -= $$NON_CUDA_LIBS # remove libs nvcc won't recognize

    # GPU architecture (might be a way to detect this somehow instead of hardcoding)
    CUDA_ARCH     = sm_20 # <- based on specs from your code. This was tested with sm_30

    # Some default NVCC flags
    NVCCFLAGS     += --compiler-options -fno-strict-aliasing -use_fast_math --ptxas-options=-v --std=c++11

    # Prepare the extra compiler configuration (taken from the nvidia forum)
    CUDA_INC = $$join(INCLUDEPATH,' -I','-I',' ')

    cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O3 -arch=$$CUDA_ARCH -c $$NVCCFLAGS \
                    $$CUDA_INC $$CUDA_LIBS  ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} \
                    $$SED_STUFF
    # nvcc error printout format ever so slightly different from gcc
    # http://forums.nvidia.com/index.php?showtopic=171651

    cuda.dependency_type = TYPE_C
    cuda.depend_command = $$CUDA_DIR/bin/nvcc -O3 -M $$CUDA_INC $$NVCCFLAGS   ${QMAKE_FILE_NAME}

    cuda.input = CUDA_SOURCES
    cuda.output = ${OBJECTS_DIR}${QMAKE_FILE_BASE}_cuda.o

    # Tell Qt that we want add more stuff to the Makefile
    QMAKE_EXTRA_COMPILERS += cuda

} # endif CUDA

以下两个文件由用于执行CUDA代码的外部函数组成。 .cu文件定义包含CUDA代码的函数，并使用NVCC进行编译（如.pro文件中所指定）。 .cuh文件用作头文件，只是声明相同的函数，以便C ++文件可以引用它们。只有wrappers.cuh需要包含在C ++代码中。

注意：可以找到引用的helper_cuda.h文件here

NoteNote ：此项目假设wrappers.cuh，wrappers.cu和helper_cuda.h保存在项目目录中标有cuda的文件夹中。< / p>

CUDA / wrappers.cuh

#ifndef WRAPPERS_CUH
#define WRAPPERS_CUH

typedef unsigned int uint;

extern "C"
{
    void cudaInit();

    void allocateArray(void **devPtr, int size);
    void freeArray(void *devPtr);

    void copyArrayToDevice(void *device, const void *host, int offset, int size);
    void copyArrayFromDevice(void *host, const void *device, int size);

    uint sumNumbers(uint *dNumbers, uint n);

    // not used here but useful when calling kernel functions
    void computeGridSize(uint n, uint blockSize, uint &numBlocks, uint &numThreads);
}

#endif // WRAPPERS_CUH

CUDA / wrappers.cu

#include <cuda_runtime.h>
#include <cuda_gl_interop.h>

#include <thrust/device_ptr.h>
#include <thrust/reduce.h>

#include "helper_cuda.h"

typedef unsigned int uint;

extern "C"
{
    void cudaInit()
    {
        int devID;

        // use device with highest Gflops/s
        devID = findCudaDevice();

        if (devID < 0)
        {
            printf("No CUDA Capable devices found, exiting...\n");
            exit(EXIT_SUCCESS);
        }
    }
    void allocateArray(void **devPtr, size_t size)
    {
        checkCudaErrors(cudaMalloc(devPtr, size));
    }

    void freeArray(void *devPtr)
    {
        checkCudaErrors(cudaFree(devPtr));
    }

    void copyArrayToDevice(void *device, const void *host, int offset, int size)
    {
        checkCudaErrors(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
    }

    void copyArrayFromDevice(void *host, const void *device, int size)
    {
        checkCudaErrors(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
    }


    uint sumNumbers(uint *dNumbers, uint n)
    {
        // simple reduction from 1 to n
        thrust::device_ptr<uint> dp_numbers(dNumbers);
        return thrust::reduce(dp_numbers, dp_numbers + n);
    }

    //Round a / b to nearest higher integer value
    uint iDivUp(uint a, uint b)
    {
        return (a % b != 0) ? (a / b + 1) : (a / b);
    }

    // compute grid and thread block size for a given number of elements
    void computeGridSize(uint n, uint blockSize, uint &numBlocks, uint &numThreads)
    {
        numThreads = min(blockSize, n);
        numBlocks = iDivUp(n, numThreads);
    }
}

接下来的三个文件创建一个简单的QT窗口并检查鼠标事件。每次移动鼠标时，X和Y像素位置都会相加，以创建 n 。然后使用CUDA函数来查找 1 + 2 + ... + n （是的，这是奇怪和随机的;重点是显示CUDA以快速简便的方式运行）。

因此，如果鼠标位于（23,45），那么：
n =（23 + 45）= 68 和
1 + 2 + ... + n = 2346

然后显示在窗口的底部。

的main.cpp

#include "mainwindow.h"
#include <QApplication>

int main(int argc, char *argv[])
{
    QApplication a(argc, argv);
    MainWindow w;
    w.show();

    return a.exec();
}

mainwindow.h

#ifndef MAINWINDOW_H
#define MAINWINDOW_H

#include <QMainWindow>

namespace Ui {
class MainWindow;
}

class MainWindow : public QMainWindow
{
    Q_OBJECT

public:
    explicit MainWindow(QWidget *parent = 0);
    ~MainWindow();

    // events are passed here
    virtual bool eventFilter(QObject *obj, QEvent *event);

private:
    Ui::MainWindow *ui;

    uint *m_dNumbers; // device array
};

#endif // MAINWINDOW_H

mainwindow.cpp

#include "mainwindow.h"
#include "ui_mainwindow.h"
#include <QEvent>
#include <QMouseEvent>
#include <assert.h>

#include "wrappers.cuh"

const uint MAX_NUMBERS = 5000;

MainWindow::MainWindow(QWidget *parent) :
    QMainWindow(parent),
    ui(new Ui::MainWindow)
{
    // basic ui setup and event filter for mouse movements
    ui->setupUi(this);
    qApp->installEventFilter(this);


    // create a host array and initialize it to {1, 2, 3, ..., MAX_NUMBERS}
    uint hNumbers[MAX_NUMBERS];
    for (uint i = 0; i < MAX_NUMBERS; i++)
    {
        hNumbers[i] = i + 1;
    }

    // CUDA FUNCTIONS:
    cudaInit(); // initialiaze the cuda device
    allocateArray((void**)&m_dNumbers, MAX_NUMBERS*sizeof(int)); // allocate device array
    copyArrayToDevice(m_dNumbers, hNumbers, 0, MAX_NUMBERS*sizeof(int)); // copy host array to device array

}

MainWindow::~MainWindow()
{
    // CUDA FUNCTION: free device memory
    freeArray(m_dNumbers);
    delete ui;
}

// used to detect mouse movement events
bool MainWindow::eventFilter(QObject *, QEvent *event)
{
    if (event->type() == QEvent::MouseMove)
    {
        // find mouseX + mouseY
        QMouseEvent *mouseEvent = static_cast<QMouseEvent*>(event);
        QPoint p = mouseEvent->pos();
        uint n = std::min((uint)(p.x() + p.y()), MAX_NUMBERS);

        // CUDA FUNCTION:
        // compute the sum of 1 + 2 + 3 + ... + n
        uint sum = sumNumbers(m_dNumbers, n);

        // check that the sum is correct
        assert(sum == ( (n * (n+1) ) / 2 ) );

        // show the sum at the bottom of the window
        statusBar()->showMessage(QString("Mouse pos: (%1, %2)    Sum from 0 to %3 = %4").arg(p.x()).arg(p.y()).    arg(n).arg(sum));
    }
    return false;
}

如果您想要实际构建和运行项目，最后但并非最不重要的是.ui文件：

mainwindow.ui

<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
 <class>MainWindow</class>
 <widget class="QMainWindow" name="MainWindow">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>400</width>
    <height>300</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>MainWindow</string>
  </property>
  <widget class="QWidget" name="centralWidget"/>
  <widget class="QMenuBar" name="menuBar">
   <property name="geometry">
    <rect>
     <x>0</x>
     <y>0</y>
     <width>400</width>
     <height>22</height>
    </rect>
   </property>
  </widget>
  <widget class="QToolBar" name="mainToolBar">
   <attribute name="toolBarArea">
    <enum>TopToolBarArea</enum>
   </attribute>
   <attribute name="toolBarBreak">
    <bool>false</bool>
   </attribute>
  </widget>
  <widget class="QStatusBar" name="statusBar"/>
 </widget>
 <layoutdefault spacing="6" margin="11"/>
 <resources/>
 <connections/>
</ui>

我知道QT / CUDA过程可能很烦人，而且自从你提出这个问题以来已经过了半年的沉默，但希望这会有所帮助。

Qt：找不到文件：@ rpath / CUDA.framework / Versions / A / CUDA for architecture x86_64

1 个答案: