CUDA错误消息:未指定的启动失败

时间:2012-03-28 05:49:05

标签: c cuda

这是我的CUDA代码的一部分。但是这段代码的最后一部分说明了一些错误信息。

unsigned int *mat_count;
off_t *mat_position;
unsigned int *matches_count;
off_t *matches_position;
......
cudaMalloc ( (void **) &mat_count,    sizeof(unsigned int)*10);
cudaMalloc ( (void **) &mat_position, sizeof(off_t)*10);
......
matches_count    = (unsigned int *)malloc(sizeof(unsigned int)*10);
matches_position = (off_t *)malloc(sizeof(off_t)*10);
for ( i = 0 ; i < 10 ; i++ ) {
    matches_count   [i] = 0;
    matches_position[i] = 0;
}
......
cudaMemcpy (mat_count,    matches_count   , sizeof(unsigned int)*10, cudaMemcpyHostToDevice );
cudaMemcpy (mat_position, matches_position, sizeof(off_t)*10,        cudaMemcpyHostToDevice );
......
match<<<BLK_SIZE,THR_SIZE>>>(
        reference_total,
        indextable_total,
        sequences, 
        start_sequence, 
        sequence_length, 
        end_sequence,
        ref_base,
        idx_base,
        msk_base,
        mat_count,
        mat_position,
        reference,
        first_indexes,
        seqmaskc
        );
err=cudaGetLastError();
if(err!=cudaSuccess)
{
printf("\n1 %s\n", cudaGetErrorString(err));
}
err=    cudaMemcpy (matches_count   , mat_count,    sizeof(unsigned int)*10, cudaMemcpyDeviceToHost );
if(err!=cudaSuccess)
{
printf("\n2 %s\n", cudaGetErrorString(err));
}
err=    cudaMemcpy (matches_position, mat_position, sizeof(off_t)*10, cudaMemcpyDeviceToHost );
if(err!=cudaSuccess)
{
printf("\n3 %s\n", cudaGetErrorString(err));
}

以下部分代码报告了“未指定的启动失败”此错误消息。 我不知道为什么会报告此错误消息。

err=cudaMemcpy (matches_position, mat_position, sizeof(off_t)*10, cudaMemcpyDeviceToHost );
if(err!=cudaSuccess)
{
printf("\n3 %s\n", cudaGetErrorString(err));
}

以下是比赛功能的一部分。

__global__ void match(...)
{
    ......
reference_blk = (THR_SIZE * blockIdx.x + threadIdx.x) * 32 + reference;
......
//-- added for parallize --//
for (p = start_p ; p != last_p ; p++) {
    for ( s = start_sequence, sequence = sequences ; s != end_sequence ;
            s++, sequence += sequence_bytes ) {
        ref_off = *(((unsigned int*)(idx_base)) + p);

        shifted_in = 0;

        if((int)(first_indexes[s-start_sequence] % 8 - ref_off % 8) < 0){
            int shamt2 = (ref_off % 8 - first_indexes[s-start_sequence] % 8);

            mask_buffer = *((unsigned long *)(msk_base + (ref_off - first_indexes[s-start_sequence])/8)) >> shamt2;

            if( ( (*(unsigned long *)(seqmaskc + 16 * (s-start_sequence))) ^ mask_buffer ) << shamt2) continue;
        }
        else if((int)(first_indexes[s-start_sequence] % 8 - ref_off % 8) == 0){
            mask_buffer = *((unsigned long *)(msk_base + (ref_off)/8));

            if( (*(unsigned long *)(seqmaskc + 16 * (s-start_sequence)) ^ mask_buffer)) continue;
        }
        else{
            int shamt2 = 8 - (first_indexes[s-start_sequence] % 8 - ref_off % 8);

            mask_buffer = *((unsigned long *)(msk_base + (ref_off/8- first_indexes[s-start_sequence]/8) - 1)) >> shamt2;

            if( ( (*(unsigned long *)(seqmaskc + 16 * (s-start_sequence))) ^ mask_buffer ) << shamt2) continue;
        }

        //full compare
        if((int)(first_indexes[s-start_sequence] % 4 - ref_off % 4) < 0){
            int shamt = (ref_off % 4 - first_indexes[s-start_sequence] % 4) * 2;
            memcpy(reference_blk, ref_base + ref_off / 4 - first_indexes[s-start_sequence] / 4, sequence_bytes);
            ......
            //-- instead of memcmp --//
            int v = 0;
            char *p1 = (char *)sequence;
            char *p2 = (char *)reference_blk;
            int tmp_asd = sequence_bytes;
            while(tmp_asd!=0){
                v = *(p1++) - *(p2++);
                if(v!=0)
                    break;
                tmp_asd--;
            }

            if(v == 0){
                mat_count[s - (int)start_sequence]++;      /* Maintain count */
                mat_position[s - (int)start_sequence] = ref_off-first_indexes[s-start_sequence]; /* Record latest position */
            }
        }
        else if((int)(first_indexes[s-start_sequence] % 4 - ref_off % 4 )== 0){
            memcpy(reference_blk, ref_base + ref_off / 4 - first_indexes[s-start_sequence] / 4, sequence_bytes);
            .......
            //-- instead of memcmp --//
            int v = 0;
            char *p1 = (char *)sequence;
            char *p2 = (char *)reference_blk;
            int tmp_asd = sequence_bytes;
            while(tmp_asd!=0){
                v = *(p1++) - *(p2++);
                if(v!=0)
                    break;
                tmp_asd--;
            }
            if(v == 0){
                mat_count[s - (int)start_sequence]++;      /* Maintain count */
                mat_position[s - (int)start_sequence] = ref_off-first_indexes[s-start_sequence]; /* Record latest position */
            }
        }
        else
        {
            int shamt = 8 - (first_indexes[s-start_sequence] % 4 - ref_off % 4) * 2;

            memcpy(reference_blk, ref_base + ref_off / 4 - first_indexes[s-start_sequence] / 4 - 1, 32);
            ......
            //-- instead of memcmp --//
            int v = 0;
            char *p1 = (char *)sequence;
            char *p2 = (char *)reference_blk;
            int tmp_asd = sequence_bytes;
            while(tmp_asd!=0){
                v = *(p1++) - *(p2++);
                if(v!=0)
                    break;
                tmp_asd--;
            }

            if (v == 0){
                mat_count[s - (int)start_sequence]++;      /* Maintain count */
                mat_position[s - (int)start_sequence] = ref_off-first_indexes[s-start_sequence];/* Record latest position */
            }
        }
    }
}

}

2 个答案:

答案 0 :(得分:37)

未指定的启动失败几乎总是一个段错误。你的内核中有一个索引错误,可能是在访问全局内存时。

我会查看你的代码,但这有点难以理解......

答案 1 :(得分:27)

使用调试标志nvcc -G -g编译您的应用程序,并尝试在cuda-memcheckcuda-gdb内运行您的应用程序。它可能会给你一个暗示问题所在的暗示。

跑步

cuda-memcheck ./yourApp