PyOpenCL如何在内核函数内本地修改矩阵

时间:2017-10-28 20:21:44

标签: python-3.x for-loop matrix pyopencl

我试图在pyOpenCL内核函数内局部修改矩阵(Pbis),当用0填充此矩阵时,它会改变结果矩阵R.执行此代码时,我们在R矩阵中获得奇怪的值。这可能是由于内存分配,但我们无法弄清楚如何解决它。通常,R应该仅由init值组成。

program = cl.Program(context, """
__kernel void generate_paths(__global float *P, ushort const n,
    ushort N, ushort init, __global float *R){
  int i = get_global_id(0);
  __private float* Pbis;
  for (int k=0; k<n; k++){
      Pbis[k] = 0;
  }
  for (int j=0; j<n; j++)
  {
      R[i*(n+1) + j] = init;

  }
  R[i*(n+1) + n] = init;
  }
""").build()

生成的参数是:

program.generate_paths(queue, res_np.shape, None, P_buf, np.uint16(n), np.uint16(N), np.uint16(init), res_buf)

以下是重现性的完整代码:

import numpy as np
import pyopencl as cl
import numpy.linalg as la
import os
os.environ['PYOPENCL_COMPILER_OUTPUT'] = '1'
os.environ['PYOPENCL_CTX'] = '1'
(n, N) = (3,6)
U = np.random.uniform(0,1, size=(n+1)*N)
U = U.astype(np.float32)
P = np.matrix([[0, 1/3, 1/3, 1/3], [1/3, 0, 1/3, 1/3], [1/3, 1/3, 0, 1/3], [1/3, 1/3, 1/3, 0]])
P = P.astype(np.float32)
res_np = np.zeros((N, n+1),dtype = np.float32)


platform = cl.get_platforms()[0]
device = platform.get_devices()[0]
context = cl.Context([device])
queue = cl.CommandQueue(context)
mf = cl.mem_flags


U_buf = cl.Buffer(context, mf.COPY_HOST_PTR | mf.COPY_HOST_PTR, hostbuf=U)
P_buf = cl.Buffer(context, mf.COPY_HOST_PTR | mf.COPY_HOST_PTR, hostbuf=P)
res_buf = cl.Buffer(context, mf.WRITE_ONLY, res_np.nbytes)

init = 0

program = cl.Program(context, """
__kernel void generate_paths(__global const float *U, __global float *P, ushort const n,
    ushort N, ushort init, __global float *R){
  int i = get_global_id(0);
  int current = init;
  __private float* Pbis;
  for (int k=0; k<n; k++){
      Pbis[k] = 0;
  }
  for (int j=0; j<n; j++)
  {
      R[i*(n+1) + j] = current;

  }
  R[i*(n+1) + n] = init;
  }
""").build()


#prg.multiply(queue, c.shape, None,
#             np.uint16(n), np.uint16(m), np.uint16(p),
 #            a_buf, b_buf, c_buf)

# a_mul_b = np.empty_like(c)
# cl.enqueue_copy(queue, a_mul_b, c_buf)

program.generate_paths(queue, res_np.shape, None, U_buf, P_buf, np.uint16(n), np.uint16(N), np.uint16(init), res_buf)
chem_gen = np.empty_like(res_np)
cl.enqueue_copy(queue, chem_gen, res_buf)
print("Platform Selected = %s" %platform.name)
print("Device Selected = %s" %device.name)
print("Generated Paths:")
print (chem_gen)

0 个答案:

没有答案