我想从函数返回一个dim3对象。 具体代码是:
dim3 getGridBasedOnBlockSize(int width, int height, int block_size) {
int gridX = (int)ceil((float)width / block_size);
int gridY = (int)ceil((float)height / block_size);
return dim3(gridX, gridY);
但是在编译错误时会发生:
error: expected expression before ‘dim3’
return dim3(gridX, gridY);
^
所以我稍微改了一下:
dim3 getGridBasedOnBlockSize(int width, int height, int block_size) {
int gridX = (int)ceil((float)width / block_size);
int gridY = (int)ceil((float)height / block_size);
dim3 gridXY(gridX, gridY);
return gridXY;
但现在它说:
error: incompatible types when returning type ‘dim3 (*)()
{aka struct dim3 (*)()}’ but ‘dim3 {aka struct dim3}’ was expected
return gridXY;
^
你可以帮帮我吗?这是什么意思,我怎样才能解决这个问题,以便用nvcc正确编译?谢谢!
此.c文件的完整代码是:
#include <unistd.h>
#include <stdio.h>
#include <math.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#define WARP_SIZE 16
#define DEBUG false
float *_copyHostDevice(float *src, int src_size) {
float *src_d;
cudaMalloc((void**)&src_d, sizeof(float) * src_size);
cudaMemcpy(src_d, src, sizeof(float) * src_size, cudaMemcpyHostToDevice);
return src_d;
}
float *_copyDeviceHost(float *src, int src_size, float *dst) {
float *target;
if (dst == NULL) {
target = (float*)malloc(sizeof(float) * src_size);
} else {
target = dst;
}
cudaMemcpy(target, src, sizeof(float) * src_size, cudaMemcpyDeviceToHost);
return target;
}
typedef struct {
int x;
int y;
} GlobalDim;
__device__ GlobalDim getGlobalDim(dim3 blockDim, dim3 blockIdx, dim3 threadIdx) {
GlobalDim gd;
gd.x = blockDim.x * blockIdx.x + threadIdx.x;
gd.y = blockDim.y * blockIdx.y + threadIdx.y;
return gd;
}
dim3 getGridBasedOnBlockSize(int width, int height, int block_size) {
int gridX = (int)ceil((float)width / block_size);
int gridY = (int)ceil((float)height / block_size);
dim3 gridXY(gridX, gridY);
return gridXY;
}
void _sleep(int n) {
usleep(n*1000000);
}
void drawMatrix(float *m, int width, int height) {
for (int i=0; i < height; i++) {
for (int j=0; j < width; j++) {
printf("%f ", m[i * width + j]);
}
printf("\n");
}
}
编译命令和结果:
$ nvcc -Wno-deprecated-gpu-targets -o project4 nn.cu parallel.cu utils.c
utils.c: In function ‘getGridBasedOnBlockSize’:
utils.c:48:5: warning: parameter names (without types)
in function declaration
dim3 gridXY(gridX, gridY);
^
utils.c:49:12: error: incompatible types when returning
type ‘dim3 (*)() {aka struct dim3 (*)()}’ but ‘dim3 {aka struct dim3}’ was expected
return gridXY;
^
对于dim3(...)案例,它显示:
nvcc -Wno-deprecated-gpu-targets -o project4 nn.cu parallel.cu utils.c
utils.c: In function ‘getGridBasedOnBlockSize’:
utils.c:48:12: error: expected expression before ‘dim3’
return dim3(gridX, gridY);
^
EDIT1: @Zindarod
使用
dim3 gridXY;
gridXY.x = gridX;
gridXY.y = gridY;
return gridXY;
而不是
dim3 gridXY(gridX, gridY);
return gridXY;
正如你的建议,不幸的是抛出了这个错误:
$ nvcc -Wno-deprecated-gpu-targets -o project4 nn.cu parallel.cu utils.c
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `_copyHostDevice':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x16): multiple definition of `_copyHostDevice'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x16): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `_copyDeviceHost':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x8c): multiple definition of `_copyDeviceHost'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x8c): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `getGlobalDim':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0xed): multiple definition of `getGlobalDim'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0xed): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `getGridBasedOnBlockSize':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x120): multiple definition of `getGridBasedOnBlockSize'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x120): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `_sleep':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x1de): multiple definition of `_sleep'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1de): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `drawMatrix':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x1fc): multiple definition of `drawMatrix'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1fc): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `setWeightsForLayers(float*, float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x27b): multiple definition of `setWeightsForLayers(float*, float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x27b): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `updateWeightsCUDA(float*, float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x885): multiple definition of `updateWeightsCUDA(float*, float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x12b2): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `update_layer(float*, float*, int, int, float*)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x3fa): multiple definition of `update_layer(float*, float*, int, int, float*)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x3fa): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `mapStepCUDA(float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x9ad): multiple definition of `mapStepCUDA(float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x13da): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `reduceStepCUDA(float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0xa9d): multiple definition of `reduceStepCUDA(float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x14ca): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `__device_stub__Z17updateWeightsCUDAPfS_S_S_ii(float*, float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x77a): multiple definition of `__device_stub__Z17updateWeightsCUDAPfS_S_S_ii(float*, float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x11a7): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `__device_stub__Z11mapStepCUDAPfS_S_ii(float*, float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x8cd): multiple definition of `__device_stub__Z11mapStepCUDAPfS_S_ii(float*, float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x12fa): first defined here
/tmp/tmpxft_00007384_00000000-29_parallel.o: In function `__device_stub__Z14reduceStepCUDAPfS_ii(float*, float*, int, int)':
tmpxft_00007384_00000000-9_parallel.cudafe1.cpp:(.text+0x9e8): multiple definition of `__device_stub__Z14reduceStepCUDAPfS_ii(float*, float*, int, int)'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1415): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `_copyHostDevice':
utils.c:(.text+0x0): multiple definition of `_copyHostDevice'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x16): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `_copyDeviceHost':
utils.c:(.text+0x76): multiple definition of `_copyDeviceHost'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x8c): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `getGlobalDim':
utils.c:(.text+0xd7): multiple definition of `getGlobalDim'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0xed): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `getGridBasedOnBlockSize':
utils.c:(.text+0x11d): multiple definition of `getGridBasedOnBlockSize'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x120): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `_sleep':
utils.c:(.text+0x1a5): multiple definition of `_sleep'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1de): first defined here
/tmp/tmpxft_00007384_00000000-30_utils.o: In function `drawMatrix':
utils.c:(.text+0x1c3): multiple definition of `drawMatrix'
/tmp/tmpxft_00007384_00000000-21_nn.o:tmpxft_00007384_00000000-4_nn.cudafe1.cpp:(.text+0x1fc): first defined here
collect2: error: ld returned 1 exit status
答案 0 :(得分:0)
替换此行:
dim3 gridXY;
gridXY.x = gridX, gridXY.y = gridY;
使用:
Microsoft.CodeDom.Providers.DotNetCompilerPlatform