我编写了一个opencl内核,用于将标量值转换为颜色,因为在常规python中执行它需要几秒钟。
不幸的是,内核(在其他地方工作的某个逻辑的精简版本)在从uint32转换为uint8 [sic](r,g,b)的三元组时似乎完全失败。
import numpy
import pyopencl as cl
from matplotlib.image import imsave
def generate_kernel(ctx):
return """
float3 hue_to_rgb(float hue)
{
float tmp = (hue-floor(hue))*6;
if (tmp<1) {
return (float3)( 1, tmp, 0);
} else if (tmp<2) {
float s = tmp-1;
float t = 2-tmp;
return (float3)( t, 1, 0);
} else if (tmp<3) {
float s = tmp-2;
float t = 3-tmp;
return (float3)(0, 1,s);
} else if (tmp<4) {
float s = tmp-3;
float t = 4-tmp;
return (float3)(0, t,1);
} else if (tmp<5) {
float s = tmp-4;
float t = 5-tmp;
return (float3)(s, 0,1);
} else {
float s = tmp-5;
float t = 6-tmp;
return (float3)(1, 0,t);
}
return (float3)(1,0,0);
}
__kernel void apply_color_map(__global uint*counts, __global uint8 *rgbs, int max_iter)
{
int idx = get_global_id(0);
int base = idx*3;
//printf("%d %d %d\\n", idx, base, counts[idx]);
if ( counts[idx] >= max_iter) {
rgbs[base] = 0;
rgbs[base+1] = 0;
rgbs[base+2] = 0;
} else {
float3 rgb = 255.9f*hue_to_rgb(counts[idx]/255.0f);
rgb.x = 0x80; // yeah, even something as simple as this does not work
rgb.y = 0x0;
rgb.z = 0x0;
rgbs[base] = floor(rgb.x);
rgbs[base+1] = floor(rgb.y);
rgbs[base+2] = floor(rgb.z);
//printf(" %v3f\\n", rgb);
//printf("counts[%d] = %d ; %v3f <%x,%u,%u>\\n", idx, counts[idx], rgb, rgbs[base], rgbs[base+1], rgbs[base+2]);
}
}
"""
def mission1(ctx, queue):
source = generate_kernel(ctx)
print (source)
prg = cl.Program(ctx, source).build()
width=256
height=16
#counts = numpy.load("/tmp/mandelbrot.npy");
counts = [ [x for x in range(width)] for y in range(height) ]
counts = numpy.array(counts, dtype=numpy.uint32)
print(counts)
kernel = prg.apply_color_map
kernel.set_scalar_arg_dtypes([None, None, numpy.uint32])
counts_len = width*height
counts_g = cl.Buffer(ctx, cl.mem_flags.READ_ONLY, counts_len*4)
rgb_g = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, counts_len*3)
rgb = numpy.zeros([height,width,3], dtype=numpy.uint8)
cl.enqueue_copy(queue, counts_g, counts).wait()
kernel(queue, [width*height], None, counts_g, rgb_g, 256) .wait()
cl.enqueue_copy(queue, rgb, rgb_g).wait()
imsave("/tmp/ramp.png", rgb)
#
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mission1(ctx, queue)
我希望输出图像看起来像是深红色(因为我将彩虹值替换为调试测试),但它看起来像是一些奇怪的条纹和噪音块。
答案 0 :(得分:0)
显然我需要改变
__kernel void apply_color_map(__global uint*counts, __global uint8 *rgbs, int max_iter)
到
__kernel void apply_color_map(__global uint*counts, __global unsigned char *rgbs, int max_iter)
如果有人可以指出解释为什么unsigned char
和uint8
与pyopencl不同的文档,我将接受他们的回答。