我正在学习如何使用cmake编译包含.cpp文件的项目 和.cu文件。目前我正在使用一个只有两个文件的玩具示例,一个是main.cpp和kernel.cu。文件是
main.cpp中:
#include <stdlib.h>
#include <string.h>
extern void kernel_wrapper(int *a, int *b);
int main(int argc, char *argv[]){
int a = 2;
int b = 3;
printf("Input: a = %d, b = %d\n",a,b);
kernel_wrapper(&a, &b);
printf("Ran: a = %d, b = %d\n",a,b);
return 0;
}
kernel.cu:
//#include "cuPrintf.cu"
#include <stdio.h>
__global__ void kernel(int *a, int *b){
int tx = threadIdx.x;
// cuPrintf("tx = %d\n", tx);
switch( tx ){
case 0:
*a = *a + 10;
break;
case 1:
*b = *b + 3;
break;
default:
break;
}
}
void kernel_wrapper(int *a, int *b){
// cudaPrintfInit();
//cuPrintf("Anything...?");
printf("Anything...?\n");
int *d_1, *d_2;
dim3 threads( 2, 1 );
dim3 blocks( 1, 1 );
cudaMalloc( (void **)&d_1, sizeof(int) );
cudaMalloc( (void **)&d_2, sizeof(int) );
cudaMemcpy( d_1, a, sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_2, b, sizeof(int), cudaMemcpyHostToDevice );
kernel<<< blocks, threads >>>( d_1, d_2 );
cudaMemcpy( a, d_1, sizeof(int), cudaMemcpyDeviceToHost );
cudaMemcpy( b, d_2, sizeof(int), cudaMemcpyDeviceToHost );
printf("Output: a = %d\n", a[0]);
cudaFree(d_1);
cudaFree(d_2);
// cudaPrintfDisplay(stdout, true);
// cudaPrintfEnd();
}
cmake文件的灵感来自这篇文章:CMake script for CUDA 6.0 with C++11
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -lpthread")
cmake_minimum_required(VERSION 3.2)
project( CUDAAndCP )
find_package(CUDA REQUIRED)
# For compilation ...
# Specify target & source files to compile it from
# Pass options to NVCC
set(
CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3 -gencode arch=compute_30,code=sm_30;
--std=c++11
)
cuda_add_library(kernel_obj kernel.cu)
cuda_add_executable(main main.cpp)
target_link_libraries(main ${CUDA_LIBRARIES})
target_link_libraries(main kernel_obj)
我确实编译了主二进制文件,但是当我运行它时,结果是
Input: a = 2, b = 3
Anything...?
Output: a = 2
Ran: a = 2, b = 3
而不是
Input: a = 2, b = 3
Anything...?
Output: a = 2
Ran: a = 12, b = 6
我通过运行
获得了正确的主二进制文件g++ -c main.cpp
nvcc -c kernel.cu
nvcc -o main main.o kernel.o
因此,似乎cuda文件没有正确链接到主二进制文件。我真的不明白为什么和任何帮助表示赞赏!
我在ubuntu 14.04,cuda 7.5和cmake 3.2.0上运行。
答案 0 :(得分:0)
我相信代码生成有些棘手。我的平台与你的平台相同,我使用的是GTX 980.
真正的问题是<com.facebook.login.widget.LoginButton
android:id="@+id/login_with_facebook"
android:layout_width="match_parent"
android:layout_height="0dp"
android:paddingTop="15dp"
android:paddingBottom="15dp"
android:layout_weight="1"
android:text="@string/login_with_facebook" />
<Button
android:id="@+id/login_with_google"
android:layout_width="match_parent"
android:layout_height="0dp"
android:layout_weight="1"
android:background="@drawable/google_button_press"
android:text="@string/login_with_google" />
<Button
android:id="@+id/login_with_twitter"
android:layout_width="match_parent"
android:layout_height="0dp"
android:layout_weight="1"
android:text="@string/login_with_twitter" />
<Button
android:id="@+id/login_with_password"
android:layout_width="match_parent"
android:layout_height="0dp"
android:layout_weight="1"
android:text="@string/login_with_password" />
。如果将此更改为52,则一切正常。
如果使用裸命令行构建,则CUDA 7.5 arch=compute_30,code=sm_30;
默认生成nvcc
和compute_20
代码。