(Windows 10,Cuda 8.0,VS 2015,CMake 3.7.0
我在使用CUDA内核构建库时遇到问题。
我的项目有一个顶级的CMakeLists.txt,其中包含一个子CMakeLists.txt。
在顶层:
cmake_minimum_required(VERSION 3.0)
project (robot)
find_package(CUDA REQUIRED)
#...some more stuff
include(${PROJECT_SOURCE_DIR}/projects/subproject/CMakeLists.txt)
然后,在子项目CMakeLists.txt中:
set(SUBPROJECT_SOURCE_DIR ${PROJECT_SOURCE_DIR}/projects/subproject)
file(GLOB_RECURSE SUBPROJECT_HEADER ${SUBPROJECT_SOURCE_DIR}/*.h)
file(GLOB_RECURSE SUBPROJECT_SOURCE ${SUBPROJECT_SOURCE_DIR}/*.cpp)
file(GLOB_RECURSE SUBPROJECT_CUDA ${SUBPROJECT_SOURCE_DIR}/*.cu)
file(GLOB_RECURSE SUBPROJECT_CUDA_HEADER ${SUBPROJECT_SOURCE_DIR}/*.cuh)
cuda_add_library(subproject STATIC ${SUBPROJECT_HEADER} ${SUBPROJECT_SOURCE} ${SUBPROJECT_CUDA_HEADER} ${SUBPROJECT_CUDA})
target_include_directories(subproject PRIVATE ${SUBPROJECT_SOURCE_DIR}/include)
#some other includes and target_includes here...
target_link_libraries(subproject <some links here>)
在我的源目录中,我有一个.cuh和一个.cu文件。这些基于简单的VectorAdd测试:
kernel.cuh:
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
__global__ void VectorAdd(int *a, int *b, int *c, int n);
int test();
和kernel.cu:
#include "kernel.cuh"
#define SIZE 1024
__global__ void VectorAdd(int *a, int *b, int *c, int n) {
int i = threadIdx.x;
if (i < n){
c[i] = a[i] + b[i];
}
}
int test() {
int *a, *b, *c;
int *d_a, *d_b, *d_c;
a = (int *)malloc(SIZE * sizeof(int));
b = (int *)malloc(SIZE * sizeof(int));
c = (int *)malloc(SIZE * sizeof(int));
cudaMalloc(&d_a, SIZE * sizeof(int));
cudaMalloc(&d_b, SIZE * sizeof(int));
cudaMalloc(&d_c, SIZE * sizeof(int));
for (int i = 0; i < SIZE; ++i) {
a[i] = i;
b[i] = i;
c[i] = 0;
}
cudaMemcpy(d_a, a, SIZE * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, SIZE * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_c, c, SIZE * sizeof(int), cudaMemcpyHostToDevice);
VectorAdd<<< 1, SIZE >>>(d_a, d_b, d_c, SIZE);
cudaMemcpy(c, d_c, SIZE * sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i < 10; ++i) {
printf("c[%d] = %d\n", i, c[i]);
}
free(a);
free(b);
free(c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
我在项目的不同C ++文件中包含Kernel.cuh。试图编译我的解决方案,我收到以下错误:
Severity Code Description Project File Line Suppression State
Error LNK1112 module machine type 'X86' conflicts with target machine type 'x64' subproject C:\path_to_proj\build\CMakeFiles\subproject.dir\projects\subproject\src\Release\subproject_generated_kernel.cu.obj 1
然后导致无法找到.lib的事实。这个错误的原因是什么?我需要在CMakeLists中添加一些内容吗?
答案 0 :(得分:3)
我不确定,这是否是您问题的正确解决方案。但它适用于我,我使用与您几乎相同的配置(Windows 10,visual studio 13,cuda 8.0,cmake 3.7)。
在我的cmake文件中,除find cuda
命令外,我还有以下代码。也许第一行适合你的错误按摩)
set(CUDA_64_BIT_DEVICE_CODE ON CACHE STRING "Compile device code in 64 bit mode" FORCE)
并完成我的cuda-cmake列表(最后一行也可以帮助,它强制链接器找到x64 cuda lib):
find_path(CUDA_CUT_INCLUDE_DIR
helper_cuda.h
PATHS "$ENV{NVSDKCOMPUTE_ROOT}" "$ENV{NVSDKCUDA_ROOT}" "$ENV{NVCUDASAMPLES_ROOT}"
PATH_SUFFIXES "common/inc" "CUDA Samples/v7.5/common/inc" "v7.5/common/inc"
DOC "Location of helper_cuda.h"
NO_DEFAULT_PATH
)
INCLUDE_DIRECTORIES( ${CUDA_INCLUDE_DIRS} ${CUDA_CUT_INCLUDE_DIR})
LINK_DIRECTORIES(${CUDA_INCLUDE_DIRS}/../lib/x64)
我希望其中一些可以帮到你。祝你好运!