我尝试使用2个不同的流,如“CUDA By Example”一书中所示,并将其应用于某些代码,不幸的是,当我运行它时,我获得了分段错误。我以前编写过没有流的代码,但是它工作正常,但我在这里看不到问题。
#include <cuda.h>
#include <stdio.h>
__global__
void GPU(node *tree ,char *data,int *out){
int tid = blockIdx.x * blockDim.x + threadIdx.x;
}
void streamTest(wrapp * wrap, char *data){
int size = wrap->size;
nodes *tree = wrap->nodes;
char *data_d0;
nodes *tree_d0;
int *out_d0;
char *data_d1;
nodes *tree_d1;
int *out_d1;
char *data_h;
nodes *tree_h;
int *out_h;
const int N = 100000000/100;
cudaStream_t stream0, stream1;
cudaMalloc((void **)&data_d0, N * sizeof(char));
cudaMalloc((void **)&tree_d0, (wrap->size*sizeof(nodes)));
cudaMalloc((void **)&out_d0, sizeof(int));
cudaMalloc((void **)&data_d1, N * sizeof(char));
cudaMalloc((void **)&tree_d1, (wrap->size*sizeof(nodes)));
cudaMalloc((void **)&out_d1, sizeof(int));
cudaHostAlloc((void**)&data_h, 100000000*(sizeof(char)),cudaHostAllocDefault);
cudaHostAlloc((void**)&tree_h, wrap->size*(sizeof(nodes)),cudaHostAllocDefault);
cudaHostAlloc((void**)&out_h, (sizeof(int)),cudaHostAllocDefault);
int x;
for(x=0; x<100000000; x++){
data_h[x] = data_h[x];
}
int z;
for(x=0; x<wrap->size;x++){
tree_h[x].value = wrap->nodes[x].value;
for(z=0; z<32; z++){
tree_h[x].array[z] = wrap->nodes[x].array[z];
}
}
for(x=0; x<100000000; x+=N*2){
cudaMemcpyAsync(data_d0, data_h+x, N*sizeof(char), cudaMemcpyHostToDevice, stream0);
cudaMemcpyAsync(data_d1, data_h+x+N, N*sizeof(char), cudaMemcpyHostToDevice, stream1);
cudaMemcpyAsync(tree_d0, tree_h, wrap->size*sizeof(nodes), cudaMemcpyHostToDevice, stream0);
cudaMemcpyAsync(tree_d1, tree_h, wrap->size*sizeof(nodes), cudaMemcpyHostToDevice, stream1);
GPU<<<256,256,0,stream0>>>(tree_d0, data_d0, out_d0 );
GPU<<<256,256,0,stream1>>>(tree_d1, data_d1, out_d1);
}
}
int main(void){
char *data = (char *)calloc('a', 100000000 *sizeof(char));
nodes *node = (nodes *) malloc(sizeof(nodes));
streamTest(wrap, data);
}
当我尝试运行时,我获得:
./a.out
Segmentation fault (core dumped)
当我检查var/log/kern.log
时,我可以看到以下内容:
a.out[20204]: segfault at 4 ip 00007fd26303f92c sp 00007fff7694efb8 error 4 in libcuda.so.331.49[7fd262e09000+b6f000]
答案 0 :(得分:2)
要使用streams,您必须先创建它们。
当我像这样修改你的代码时:
cudaStream_t stream0, stream1;
cudaStreamCreate(&stream0); // add this line
cudaStreamCreate(&stream1); // add this line
对我来说,段错误消失了。