我试图在使用CUDA 7.5的远程计算机上使用Nvidia Tesla M2090在CudaSift项目中运行cudaSift
。这台机器有4个这样的GPU,但我非常确定调试时初始化是否正确完成,无论如何这是代码:
void InitCuda(int devNum)
{
int nDevices;
cudaGetDeviceCount(&nDevices);
if (!nDevices) {
std::cerr << "No CUDA devices available" << std::endl;
return;
}
devNum = std::min(nDevices-1, devNum);
deviceInit(devNum);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, devNum);
printf("Device Number: %d\n", devNum);
printf(" Device name: %s\n", prop.name);
printf(" Memory Clock Rate (MHz): %d\n", prop.memoryClockRate/1000);
printf(" Memory Bus Width (bits): %d\n", prop.memoryBusWidth);
printf(" Peak Memory Bandwidth (GB/s): %.1f\n\n",
2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
}
我使用cmake生成makefile,make
文件生成cudaSift
而没有任何错误。
无论如何,当我运行它时,会返回以下错误:
safeCall() Runtime API error in file </ghome/rzhengac/Downloads/CudaSift-Maxwell/cudaSiftH.cu>, line 42 : invalid device symbol.
在cudaSiftH.cu
的第42行,它是:
safeCall(cudaMemcpyToSymbol(d_PointCounter, &totPts, sizeof(int)));
其中:
__device__ unsigned int d_PointCounter[1];
int totPts = 0;
这是由InitCuda
打印的代码(确保在初始化期间一切正常):
Device Number: 0
Device name: Tesla M2090
Memory Clock Rate (MHz): 1848
Memory Bus Width (bits): 384
Peak Memory Bandwidth (GB/s): 177.4
SOLUTION:
正如评论中所建议的那样,我正在使用错误的架构进行编译:我必须将CMakeList.txt
中的每个sm_35
更改为sm_20
,这就是结果:
cmake_minimum_required(VERSION 2.6)
project(cudaSift)
set(cudaSift_VERSION_MAJOR 2)
set(cudaSift_VERSION_MINOR 0)
set(cudaSift_VERSION_PATCH 0)
set(CPACK_PACKAGE_VERSION_MAJOR "${cudaSift_VERSION_MAJOR}")
set(CPACK_PACKAGE_VERSION_MINOR "${cudaSift_VERSION_MINOR}")
set(CPACK_PACKAGE_VERSION_PATCH "${cudaSift_VERSION_PATCH}")
set(CPACK_GENERATOR "ZIP")
include(CPack)
find_package(OpenCV REQUIRED)
find_package(CUDA)
if (NOT CUDA_FOUND)
message(STATUS "CUDA not found. Project will not be built.")
endif(NOT CUDA_FOUND)
if (WIN32)
set(EXTRA_CXX_FLAGS "/DVERBOSE /D_CRT_SECURE_NO_WARNINGS ")
list(APPEND CUDA_NVCC_FLAGS "-arch=sm_20;--compiler-options;-O2;-DVERBOSE")
endif()
if (UNIX)
if (APPLE)
set(EXTRA_CXX_FLAGS "-DVERBOSE -msse2")
list(APPEND CUDA_NVCC_FLAGS "-arch=sm_20;--compiler-options;-O2;-DVERBOSE")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -msse2 ")
list(APPEND CUDA_NVCC_FLAGS "-lineinfo;--compiler-options;-O2;-DVERBOSE")
endif()
endif()
set(cuda_sources
# dynamic.cu
cudaImage.cu
cudaImage.h
cudaSiftH.cu
cudaSiftH.h
matching.cu
cudaSiftD.h
cudaSift.h
cudautils.h
)
set(sources
geomFuncs.cpp
mainSift.cpp
)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)
SET(CUDA_SEPARABLE_COMPILATION ON)
cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_20)
set_target_properties(cudasift PROPERTIES
COMPILE_FLAGS "${EXTRA_CXX_FLAGS}"
)
target_link_libraries(cudasift
/usr/local/cuda/lib64/libcudadevrt.a ${OpenCV_LIBS}
)
install(FILES
${cuda_sources}
${sources}
cudaSiftD.cu
CMakeLists.txt
Copyright.txt
DESTINATION .
)
install(FILES data/left.pgm data/righ.pgm
DESTINATION data
)