我发现在Visual Studio 2012中编译的Thrust程序中无法识别Cuda atomicOr函数。
我已经读过,调用NVidia nvcc编译器时应该已经包含了所有头文件。此问题上的大多数帖子都声明这必然意味着架构设置不正确。
我根据其他帖子尝试了这些设置: How to set CUDA compiler flags in Visual Studio 2010?
......以及使用: http://s1240.photobucket.com/user/fireshot8888/media/cuda_settings.png.html
main.cpp中:
#include <thrust/device_vector.h>
#include <cstdlib>
#include <iostream>
#include "cuda.h"
using namespace std;
//Visual C++ compiled main function to launch the GPU calling code
int main(int argc, char *argv[])
{
//Just some random data hand keyed to make it a complete example for stack overflow while not being too complicated
float data[] = {1.2, 3.4, 3.4, 3.3, 4.4, 4.4, 4.4, 3.4, 4.4, 4.4,
1.2, 3.4, 3.4, 3.3, 4.4, 4.4, 4.4, 3.4, 4.4, 4.4};
thrust::host_vector<float> h_data(data, data+20); //Holds the contents of the file as they are read; it will be cleared once we are done with it.
const int numVars = 10;
int numBins = 4;
int rowCount = 2;
doHistogramGPU(numVars, h_data, numBins, rowCount);
return 0;
}
cuda.cu:
#include "cuda.h"
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
//I GAVE THIS A TRY BUT IT DID NOT FIX MY ISSUE::::
#include <cuda_runtime.h>
#include <cuda.h>
using namespace std;
//Function to call the kernel
void doHistogramGPU(int numVars, thrust::host_vector<float> h_buffer, int numBins, int numRecords)
{
int dataSize = sizeof(BYTE_UNIT);
int shiftSize = dataSize - 1;
thrust::device_vector<float> d_buffer(h_buffer.begin(), h_buffer.end());
int bitVectorSize = ceil(numRecords * numVars / (float)dataSize);
thrust::device_vector<BYTE_UNIT> d_bitData(bitVectorSize * numBins);
thrust::counting_iterator<int> counter(0);
auto zipInFirst = thrust::make_zip_iterator(thrust::make_tuple(d_buffer.begin(), counter));
auto zipInLast = thrust::make_zip_iterator(thrust::make_tuple(d_buffer.end(), counter + d_buffer.size()));
float minValues[] = {579.8, 72.16, 0.000385, 7.576e-005, 6.954e-005, 0, 0, 2.602e-012, 1.946e-013, 7.393e-015};
float maxValues[] = {1053, 22150, 0.7599, 0.7596, 0.24, 0.2398, 0.1623, 1.167e-007, 4.518e-006, 5.322e-008};
//Get things loaded onto the device then call the kernel
thrust::device_vector<float> d_minValues(minValues, minValues+10);
thrust::device_vector<float> d_maxValues(maxValues, maxValues+10);
thrust::device_ptr<float> minDevPtr = &d_minValues[0];
thrust::device_ptr<float> maxDevPtr = &d_maxValues[0];
thrust::device_ptr<BYTE_UNIT> dataDevPtr = &d_bitData[0];
//Invoke the Thrust Kernel
thrust::for_each(zipInFirst, zipInLast, BinFinder(thrust::raw_pointer_cast(dataDevPtr), thrust::raw_pointer_cast(minDevPtr), thrust::raw_pointer_cast(maxDevPtr), numVars, numBins, numRecords));
cout << endl;
return;
}
cuda.h:
#ifndef CUDA_H
#define CUDA_H
#include <thrust/device_vector.h>
#include <iostream>
//I tried these here, too...
#include <cuda_runtime.h>
#include <cuda.h>
using namespace std;
typedef long BYTE_UNIT; //32 bit storage
void doHistogramGPU(int numvars, thrust::host_vector<float> h_buffer, int numBins, int numRecords);
struct BinFinder
{
BYTE_UNIT * data;
float * rawMinVector;
float * rawMaxVector;
int numVars;
int numBins;
int numRecords;
BinFinder(BYTE_UNIT * data, float * rawMinVector, float * rawMaxVector, int numVars, int numBins, int numRecords)
{
this -> data = data;
this -> rawMinVector = rawMinVector;
this -> rawMaxVector = rawMaxVector;
this -> numVars = numVars;
this -> numBins = numBins;
this -> numRecords = numRecords;
}
//This kernel converts the multidimensional bin representation to a single dimensional representation
template <typename Tuple>
__device__ void operator()( Tuple param )
{
int dataSize = sizeof(BYTE_UNIT);
int shiftSize = dataSize - 1;
int bitVectorSize = ceil(numRecords * numVars / float(dataSize));
float value = thrust::get<0>(param);
int id = thrust::get<1>(param);
//Look up the min and max values for this data column using the index
float min = rawMinVector[id % numVars];
float max = rawMaxVector[id % numVars];
//Calculate the bin id
float percentage = (value - min) / float(max - min);
char bin = percentage * numBins;
if (bin == numBins)
{
bin--;
}
//////////////////////////////////////////////////////////////
//Set a 1 in the appropriate bitvector for the calculated bin
//////////////////////////////////////////////////////////////
//What I originally tried to do that appeared to have generated race conditions (using data from a file):
//data[bin * bitVectorSize + id / dataSize] |= (1 << (shiftSize - id % dataSize));
//What I've been trying to do now that generates a compilation error:
atomicOr(data + (bin * bitVectorSize + id / dataSize), 1 << (shiftSize - id % dataSize)); //<----THIS DOESN'T COMPILE!!!!!!!!!
}
};
#endif
cuda.cu的nvcc命令(包括我的cuda.h文件):
"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/bin/nvcc.exe" "C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/src/cuda.cu" -c -o "C:/Users/datahead8888/Documents/Visual Studio 2012/Projects/thrust-space-data/build/CMakeFiles/CudaLib.dir//Debug/CudaLib_generated_cuda.cu.obj" -ccbin "C:/Program Files (x86)/Microsoft Visual Studio 11.0/VC/bin" -m64 -Xcompiler ,\"/DWIN32\",\"/D_WINDOWS\",\"/W3\",\"/GR\",\"/EHsc\",\"/D_DEBUG\",\"/MDd\",\"/Zi\",\"/Ob0\",\"/Od\",\"/RTC1\" -DNVCC "-IC:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/include" "-IC:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v6.0/include"
nvcc输出完整错误:
1&gt; nvcc:警告:&#39; compute_10&#39;和&#39; sm_10&#39;架构已弃用,可能会在将来的版本中删除。
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(107):warning:variable&#34; minValues&#34;被宣布但从未被引用
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(108):warning:variable&#34; maxValues&#34;被宣布但从未被引用
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(462):warning:variable&#34; shiftSize&#34;被宣布但从未被引用
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(602):警告:对非const的引用的初始值必须是左值
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(618):警告:无法访问的代码中的动态初始化
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(522):warning:variable&#34; shiftSize&#34;被宣布但从未被引用
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(975):警告:对非const的引用的初始值必须是左值
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(993):警告:对非const的引用的初始值必须是左值
1 GT;
1&gt; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(1022):warning:variable&#34; shiftSize&#34;被宣布但从未被引用
1 GT;
1&gt; c:\ users \ datahead8888 \ documents \ visual studio 2012 \ projects \ thrust-space-data \ src \ cuda.h(188):error:identifier&#34; atomicOr&#34;未定义
1 GT;检测期间:
1 GT;实例化&#34; void BinFinder :: operator()(Tuple)[with Tuple = thrust :: detail :: tuple_of_iterator_references]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / detail / function.h(119):here
1 GT;实例化&#34;结果thrust :: detail :: device_function :: operator()(const Argument&amp;)const [with Function = BinFinder,Result = void,Argument = thrust :: detail :: tuple_of_iterator_references,int,thrust: :null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt;]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / system / cuda / detail / for_each.inl(82):here
1 GT;实例化&#34; thrust :: system :: cuda :: detail :: for_each_n_detail :: for_each_n_closure :: result_type thrust :: system :: cuda :: detail :: for_each_n_detail :: for_each_n_closure :: operator()()[with RandomAccessIterator = thrust :: zip_iterator&gt ;, thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust: :null_type&gt;&gt;,Size = unsigned int,UnaryFunction = BinFinder,Context = thrust :: system :: cuda :: detail :: detail :: blocked_thread_array]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / system / cuda / detail / detail / launch_closure.inl(49):here
1 GT;实例化&#34; void thrust :: system :: cuda :: detail :: detail :: launch_closure_by_value(Closure)[与Closure = thrust :: system :: cuda :: detail :: for_each_n_detail :: for_each_n_closure&gt ;,推力: :counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt;&gt ;, unsigned int,BinFinder ,thrust :: system :: cuda :: detail :: detail :: blocked_thread_array&gt;]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / system / cuda / detail / detail / launch_closure.inl(77):here
1 GT;实例化&#34; thrust :: system :: cuda :: detail :: detail :: closure_launcher_base :: launch_function_t thrust :: system :: cuda :: detail :: detail :: closure_launcher_base :: get_launch_function()[with Closure = thrust :: system :: cuda :: detail :: for_each_n_detail :: for_each_n_closure&gt ;, thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust: :null_type,thrust :: null_type,thrust :: null_type&gt;&gt;,unsigned int,BinFinder,thrust :: system :: cuda :: detail :: detail :: blocked_thread_array&gt;,launch_by_value = true]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / system / cuda / detail / detail / launch_closure.inl(185):here
1 GT; [2个实例化上下文未显示]
1 GT;实例化&#34; thrust :: tuple thrust :: system :: cuda :: detail :: for_each_n_detail :: configure_launch(Size)[with Closure = thrust :: system :: cuda :: detail :: for_each_n_detail :: for_each_n_closure&gt; ,thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt;&gt ;, unsigned int,BinFinder,thrust :: system :: cuda :: detail :: detail :: blocked_thread_array&gt;,Size = long long]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / system / cuda / detail / for_each.inl(163):here
1 GT;实例化&#34; RandomAccessIterator thrust :: system :: cuda :: detail :: for_each_n(thrust :: system :: cuda :: detail :: execution_policy&amp;,RandomAccessIterator,Size,UnaryFunction)[with DerivedPolicy = thrust :: system :: cuda :: detail :: tag,RandomAccessIterator = thrust :: zip_iterator&gt ;, thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt;&gt;,Size = long long,UnaryFunction = BinFinder]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / system / cuda / detail / for_each.inl(191):here
1 GT;实例化&#34; RandomAccessIterator thrust :: system :: cuda :: detail :: for_each(thrust :: system :: cuda :: detail :: execution_policy&amp;,RandomAccessIterator,RandomAccessIterator,UnaryFunction)[with DerivedPolicy = thrust :: system :: cuda :: detail :: tag,RandomAccessIterator = thrust :: zip_iterator&gt ;, thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt;&gt;,UnaryFunction = BinFinder]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / detail / for_each.inl(43):here
1 GT;实例化&#34; InputIterator thrust :: for_each(const thrust :: detail :: execution_policy_base&amp;,InputIterator,InputIterator,UnaryFunction)[与DerivedPolicy = thrust :: system :: cuda :: detail :: tag,InputIterator = thrust :: zip_iterator&gt ;, thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt; &gt;,UnaryFunction = BinFinder]&#34;
1 GT; C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v6.0 \ include \ thrust / detail / for_each.inl(57):here
1 GT;实例化&#34; InputIterator thrust :: for_each(InputIterator,InputIterator,UnaryFunction)[with InputIterator = thrust :: zip_iterator&gt ;, thrust :: counting_iterator,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust: :null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type,thrust :: null_type&gt;&gt;,UnaryFunction = BinFinder]&#34;
1 GT; C:/ Users / datahead8888 / Documents / Visual Studio 2012 / Projects / thrust-space-data / src / cuda.cu(597):这里
1 GT;
1 GT;在编译&#34; C:/Users/DATAHE~1/AppData/Local/Temp/tmpxft_00001f78_00000000-8_cuda.cpp1.ii"中检测到1错误。
1 GT; cuda.cu
答案 0 :(得分:1)
未定义的原因是因为您未正确指定项目设置以编译支持原子的体系结构(cc1.1或更高版本)。
您需要修改编译操作的设置,以便针对GPU支持的体系结构以及支持原子的体系结构进行编译。
您的编译命令根本不包含架构开关,因此默认架构(cc1.0)正在被定位。此体系结构不支持原子,并且在CUDA 6中也不推荐使用,因此编译器会发出警告,通知您正在为不推荐使用的体系结构进行编译。
您需要学习可用的问题和文档以了解如何设置目标体系结构,并且必须确保不包含cc1.0,否则编译将失败。 (例如,在您链接的this question中,使用答案中讨论的方法,而不是问题。问题中描述的方法不起作用。并阅读所有答案,注意到项目属性位置和文件特定的位置都可以进行此设置。)
如果您在安排设置时遇到困难,可以尝试打开一个依赖于原子的CUDA示例项目,例如: simple atomic intrinsics并从该项目中删除现有代码,并将代码放入其中。然后,您应该从该项目中选择适当的项目设置以使用原子。