我是CUDA的新手,当我遇到访问冲突读取位置运行时异常时,我正试图制作一个模糊.tga文件的简单程序。因为我对CUDA很陌生,所以我无法弄清楚如何修复它并且google不是很有帮助。 这是代码:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>
#define println(...) std::cout << __VA_ARGS__ << "\n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5
unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];
void read(char input[256]) {
FILE* f;
f = fopen(input, "rb");
if (!f) {
printf("File Reading Failed\n");
}
fread(&header, 1, 18, f);
fread(&in, 1, HEIGHT*WIDTH * 3, f);
fclose(f);
}
void write(char output[256]) {
FILE* fw;
fw = fopen(output, "wb+");
if (!fw) {
printf("File Writing Failed\n");
}
header[16] = 24;
header[13] = WIDTH / 256;
header[12] = WIDTH % 256;
header[15] = HEIGHT / 256;
header[14] = HEIGHT % 256;
fwrite(&header, 1, 18, fw);
fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);
fclose(fw);
}
__device__
int toIndex(int x, int y) {
return x + y / WIDTH;
}
__device__
void doPixel(int x, int y, char *red, char *green, char *blue) {
int avgRed = 0;
int avgGreen = 0;
int avgBlue = 0;
for (int i = -BLUR; i <= BLUR; i++) {
for (int j = -BLUR; j <= BLUR; j++) {
avgRed += red[toIndex(i, j)];
avgBlue += blue[toIndex(i, j)];
avgGreen += green[toIndex(i, j)];
}
}
red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);
}
__global__
void setValue(char *red, char *green, char *blue) {
int x;
int y;
for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
x = i % WIDTH;
y = i / WIDTH;
doPixel(x, y, red, green, blue);
}
}
int main(void) {
char *red, *green, *blue;
double time;
read("test.tga");
cudaMallocManaged(&red, WIDTH * HEIGHT);
cudaMallocManaged(&green, WIDTH * HEIGHT);
cudaMallocManaged(&blue, WIDTH * HEIGHT);
for (int x = 0; x < WIDTH; x++) {
for (int y = 0; y < HEIGHT; y++) {
red[x + y*WIDTH] = in[y][x][2];
green[x + y*WIDTH] = in[y][x][1];
blue[x + y*WIDTH] = in[y][x][0];
}
}
time = clock();
setValue<<<1, 1024>>>(red, green, blue);
cudaDeviceSynchronize();
println((clock() - time) / CLOCKS_PER_SEC);
int x;
int y;
for (int i = 0; i < WIDTH * HEIGHT; i++) {
x = i % WIDTH;
y = i / WIDTH;
out[y][x][0] = blue[i]; //Program gives error here
out[y][x][1] = green[i];
out[y][x][2] = red[i];
}
write("test.tga");
cudaFree(red);
cudaFree(green);
cudaFree(blue);
getchar();
}
我读到cudaDeviceSynchronize()是修复此问题的方法,但这似乎不起作用。 cudaThreadSynchronize()也无法解决问题。
答案 0 :(得分:1)
查找非法内存访问错误的最简单方法是使用cuda-gdb
运行二进制文件。确保在编译时提供-g -G -O0
标志
在您的情况下,您可能会在此代码段中发现一些错误
for (int i = -BLUR; i <= BLUR; i++) {
for (int j = -BLUR; j <= BLUR; j++) {
avgRed += red[toIndex(i, j)];
avgBlue += blue[toIndex(i, j)];
avgGreen += green[toIndex(i, j)];
}
}