我正在尝试编写一个pyopencl脚本来计算Thomas的循环对称吸引子。 的功能是
x'= sin(y)-bx
y'= sin(z)-
z'= sin(x)-bz
我已经用python3编写了一个可以运行的实现,尽管运行缓慢。这是我想要的输出:
这是我的pyopencl实现的输出:
我相信我在正弦函数上遇到某种舍入误差或近似误差,因此我尝试将所有内容强制转换为双精度,但没有成功。我看到的另一种可能性是,在函数迭代时输出达到的值时出现了一些错误,但是我不知道会是什么。
这里是有问题的内核。
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
__kernel void thomas(__global float3 *a,
__global float3 *output, ulong const maxiter, float const stepSize, float const b )
{
int gid = get_global_id(0);
double x = a[gid][0];
double y = a[gid][1];
double z = a[gid][2];
double x1,y1,z1 = 0.0;
for(int citer = 0; citer<maxiter;citer++){
x1 = x+stepSize*(sin(y)-b*x);
y1 = y+stepSize*(sin(z)-b*y);
z1 = z+stepSize*(sin(x)-b*z);
output[gid*maxiter+citer][0]=x1;
output[gid*maxiter+citer][1]=y1;
output[gid*maxiter+citer][2]=z1;
x = x1;
y = y1;
z = z1;
}
}
a是一个起始值数组,而输出是一个长度为a * maxiter的数组
我希望pyopencl实现的输出与python3实现相匹配,但似乎只在xy平面中输出一个形状,该形状与3d形状的关系对我来说是不确定的。
编辑:这是违规程序的其余代码
import numpy as np
import pyopencl as cl
import open3d as o3d
def calc_thomas_opencl(npoints, stepSize, maxiter, b):
ballRadius = .5
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
points = []
for point in range(npoints):
x1 = np.random.rand()-.5
x2 = np.random.rand()-.5
x3 = np.random.rand()-.5
u = np.random.rand()
fac = ballRadius*u**.3/(np.sqrt(x1**2+x2**2+x3**2))
point = (x1*fac,x2*fac,x3*fac)
points.append(point)
a=np.array(points,dtype = np.float32)
print(a[0])
a_opencl = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
output = np.zeros([npoints*maxiter,3])
output_opencl = cl.Buffer(ctx, mf.WRITE_ONLY, output.nbytes)
prg = cl.Program(ctx, """
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
__kernel void thomas(__global float3 *a,
__global float3 *output, ulong const maxiter, float const stepSize, float const b )
{
int gid = get_global_id(0);
double x = a[gid][0];
double y = a[gid][1];
double z = a[gid][2];
double x1,y1,z1 = 0.0;
for(int citer = 0; citer<maxiter;citer++){
x1 = x+stepSize*(sin(y)-b*x);
y1 = y+stepSize*(sin(z)-b*y);
z1 = z+stepSize*(sin(x)-b*z);
output[gid*maxiter+citer][0]=x1;
output[gid*maxiter+citer][1]=y1;
output[gid*maxiter+citer][2]=z1;
x = x1;
y = y1;
z = z1;
}
}
""").build()
prg.thomas(queue, (npoints,), None, a_opencl,
output_opencl, np.uint64(maxiter), np.float32(stepSize), np.float32(b))
cl.enqueue_copy(queue, output, output_opencl).wait()
return output
xyz = calc_thomas_opencl(1000,.05,1000,.2)
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(xyz)
o3d.visualization.draw_geometries([pcd])
答案 0 :(得分:0)
问题出在
output = np.zeros([npoints*maxiter,3])
它必须是
output = np.zeros([npoints*maxiter,3], dtype = np.float32)