我曾经使用CUDA 10.0版。
我计算浮点值。但是计算结果不是有效值。
我的代码是
__global__ void ict_kernel(int *imgData_0, int *imgData_1, int *imgData_2, int range, int tile, int shift)
{
int n = threadIdx.x + blockIdx.x * 256 + tile * MAX_BLOCKS * 256;
if (n < range) { //more threads than pixels, therefore check if in range
//float Y, C_r, C_b;
float *imgData_f_0;
float *imgData_f_1;
float *imgData_f_2;
float conv_x, conv_y, conv_z, *before_conv_x, *before_conv_y, *before_conv_z , *after_conv_x, *after_conv_y, *after_conv_z;
float *after_conv_x2, *after_conv_y2, *after_conv_z2, *after_conv_x3, *after_conv_y3, *after_conv_z3;
float DCI_DEGAMMA = 0.3846153f;
float DCI_COEFFICENT = 0.916555f; //48.0 / 52.37;
float COLOR_DEPTH= 4095.0f;
float HEADROOM= 256;imgData_f_0 = (float*)imgData_0;
imgData_f_1 = (float*)imgData_1;
imgData_f_2 = (float*)imgData_2;
before_conv_x = (float*)imgData_0;
before_conv_y = (float*)imgData_1;
before_conv_z = (float*)imgData_2;
after_conv_x = (float*)imgData_0;
after_conv_y = (float*)imgData_0;
after_conv_z = (float*)imgData_0;
after_conv_x2 = (float*)imgData_0;
after_conv_y2 = (float*)imgData_0;
after_conv_z2 = (float*)imgData_0;
after_conv_x3 = (float*)imgData_0;
after_conv_y3 = (float*)imgData_0;
after_conv_z3 = (float*)imgData_0;
- 这是声明
这是计算代码
before_conv_x[n] = imgData_0[n] / COLOR_DEPTH;
before_conv_y[n] = imgData_1[n] / COLOR_DEPTH;
before_conv_z[n] = imgData_2[n] / COLOR_DEPTH;
// gamma sRGB 2.4
if (before_conv_x[n] > 0.04045f) {
before_conv_x[n] = powf((before_conv_x[n] + 0.055f) / 1.055f, 2.4f);
}
else {
before_conv_x[n] = before_conv_x[n] / 12.92f;
}
if (before_conv_y[n] > 0.04045f) {
before_conv_y[n] = powf((before_conv_y[n] + 0.055f) / 1.055f, 2.4f);
}
else {
before_conv_y[n] = before_conv_y[n] / 12.92f;
}
if (before_conv_z[n] > 0.04045f) {
before_conv_z[n] = powf((before_conv_z[n] + 0.055f) / 1.055f, 2.4f);
}
else {
before_conv_z[n] = before_conv_z[n] / 12.92f;
}
after_conv_x[n] = 0.4124564f *(float)before_conv_x[n] + 0.3575761f *(float)before_conv_y[n] + 0.1804375f *(float)before_conv_z[n];
after_conv_y[n] = 0.2126729f *(float)before_conv_x[n] + 0.7151522f *(float)before_conv_y[n] + 0.072175f *(float)before_conv_z[n];
after_conv_z[n] = 0.0193339f *(float)before_conv_x[n] + 0.119192f *(float)before_conv_y[n] + 0.9503041f *(float)before_conv_z[n];
after_conv_x2[n] = powf((float)after_conv_x[n] * (float)DCI_COEFFICENT, (float)DCI_DEGAMMA);
after_conv_y2[n] = powf((float)after_conv_y[n] * (float)DCI_COEFFICENT, (float)DCI_DEGAMMA);
after_conv_z2[n] = powf((float)after_conv_z[n] * (float)DCI_COEFFICENT, (float)DCI_DEGAMMA);
after_conv_x3[n] = after_conv_x2[n] * COLOR_DEPTH;
after_conv_y3[n] = after_conv_y2[n] * COLOR_DEPTH;
after_conv_z3[n] = after_conv_z2[n] * COLOR_DEPTH;
我的预期结果是0到4095,但是值是65555555。
传入数据范围为0到4095
此代码是RGB到XYZ的转换矩阵。
这是cuda powf问题吗?
我的猜测是 * COLOR_DEPTH 中的值发生奇怪的变化。
之前的值是0到1之间的数字。
出什么问题了?
谢谢。