我在使用CUDA和Visual Studio时遇到了一些问题,当我运行CUDA调试器时,它会运行最后一次成功构建的代码,但是当我尝试再次构建项目时,它输出:"...\main.cu" exited with code 255
错误列表,在输出选项卡中,基本上是这个错误:ptxas fatal : Unresolved extern function '_ZN7Vector4plERKS_'
我已经检查了其他问题,尝试使用Google搜索,在不同的网站上搜索,尝试将可重新定位的设备设置为打开,但是它提供了相同的错误消息,除了ptxas
但是有这个:
Undefined reference to '_ZN7Vector4plERKS_' in 'x64/Debug/main.cu.obj'
另外,我使用的是VS2015并且只是为了确定:我应该首先构建项目并使用CUDA调试器运行它? "本地Windows调试器"按钮不应该被使用,对吧?
无论如何,这是我的代码:
Vector.cuh
#pragma once
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif
#include <iostream>
class Vector4
{
public:
float x, y, z, w;
CUDA_CALLABLE_MEMBER Vector4();
CUDA_CALLABLE_MEMBER Vector4(float x, float y, float z, float w);
CUDA_CALLABLE_MEMBER virtual ~Vector4();
CUDA_CALLABLE_MEMBER void print();
CUDA_CALLABLE_MEMBER Vector4 operator+(const Vector4& other);
CUDA_CALLABLE_MEMBER void add(Vector4* other);
};
Vector.cu的一部分
Vector4::Vector4(float x, float y, float z, float w)
{
this->x = x;
this->y = y;
this->z = z;
this->w = w;
}
Vector4 Vector4::operator+(const Vector4 & other)
{
return Vector4(
this->x + other.x,
this->y + other.y,
this->z + other.z,
this->w + other.w
);
}
main.cu
#include <iostream>
#include <cuda.h>
#include "cuda_runtime.h"
#include "Vector.cuh"
#include <SFML/Graphics.hpp>
__global__ void addVector(Vector4* a, Vector4* b)
{
(*a) = (*a) + (*b);
//a->x += 1;
}
int main()
{
sf::RenderWindow window(sf::VideoMode(200, 200), "SFML works!");
sf::CircleShape shape(100.f);
shape.setFillColor(sf::Color::Green);
int size = sizeof(Vector4);
Vector4 v(1, 0, 0, 0);
Vector4 b(1, 1, 0, 0);
Vector4* d_v;
Vector4* d_b;
//cudaMalloc the device pointers
//cudaMalloc(&pointer, bytes)
cudaMalloc(&d_v, size);
cudaMalloc(&d_b, size);
while (window.isOpen())
{
sf::Event event;
while (window.pollEvent(event))
{
if (event.type == sf::Event::Closed)
window.close();
}
//cudaMemcpy the pointers to actual host data
//cudaMemcpy(to, from, bytes, cudaMemcpyHostToDevice)
cudaMemcpy(d_v, &v, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);
cudaError_t err = cudaGetLastError();
HANDLE_ERROR(err);
//call kernel with the new device data
addVector << <1, 1 >> >(d_v, d_b);
//cudaMemcpy back to the old host variables
//cudaMemcpy(to, from, bytes, cudaMemcpyDeviceToHost)
cudaMemcpy(&v, d_v, size, cudaMemcpyDeviceToHost);
cudaMemcpy(&b, d_b, size, cudaMemcpyDeviceToHost);
v.print();
b.print();
printf("\n\n");
window.clear();
window.draw(shape);
window.display();
}
//cudaFree
cudaFree(d_v);
cudaFree(d_b);
getchar();
return 0;
}
此处还有项目设置中CUDA C ++下的命令行代码:
set CUDAFE_FLAGS=--sdk_dir "C:\Program Files (x86)\Windows Kits\8.1\"
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\nvcc.exe" --use-local-env --cl-version 2015 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -Xcompiler "/EHsc /nologo /FS /Zi " -o x64\Debug\%(Filename)%(Extension).obj "%(FullPath)"
对不起文字之墙,谢谢!
修改 我使用的是CUDA 8.0
答案 0 :(得分:1)
这不应该编译。
在我能看到的任何地方都没有__device__ _ZN7Vector4plERKS_
(即__device__ Vector4::operator+(Vector4 const&)
)的定义。当您修复它时,您将需要启用单独的设备代码编译和链接,或者将Vector.cu导入main.cu,因为运算符的设备代码未在与调用的内核相同的转换单元中定义它