用cython声明一个numpy数组会产生很多开销

时间:2016-05-24 08:27:08

标签: python numpy cython

我正在使用Cython重写我的一些Python代码。

遵循建议in the documentation我开始使用优化的cython定义替换我的python数组。

特别是,以下应该是声明numpy数组的“最佳”方式:

# cython: profile=True
# cython: boundscheck=False
# cython: wraparound=False

import numpy as np
cimport numpy as np

cpdef test():

    cdef np.ndarray[np.int_t, ndim=1] seeds_idx = np.empty(10, dtype=np.int)

    pass

但是,通过cython -a my_file.pyx分析上述代码生成的html文件显示如下:

+10:     cdef np.ndarray[np.int_t, ndim=1] seeds_idx = np.empty(10, dtype=np.int)
  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_empty); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_int); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_4) < 0) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_tuple_, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 10, __pyx_L1_error)
  __pyx_t_5 = ((PyArrayObject *)__pyx_t_4);
  {
    __Pyx_BufFmt_StackElem __pyx_stack[1];
    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
      __pyx_v_seeds_idx = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.buf = NULL;
      __PYX_ERR(0, 10, __pyx_L1_error)
    } else {__pyx_pybuffernd_seeds_idx.diminfo[0].strides = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_seeds_idx.diminfo[0].shape = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.shape[0];
    }
  }
  __pyx_t_5 = 0;
  __pyx_v_seeds_idx = ((PyArrayObject *)__pyx_t_4);
  __pyx_t_4 = 0;
/* … */
  __pyx_tuple_ = PyTuple_Pack(1, __pyx_int_10); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);

这是在Python 2.7上使用cython 0.24和numpy 1.10.4获得的。

另一方面,非常简单的声明seeds_idx = np.empty(10)导致:

+10:     seeds_idx = np.empty(10)
  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_empty); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_v_seeds_idx = __pyx_t_1;
  __pyx_t_1 = 0;
/* … */
  __pyx_tuple_ = PyTuple_Pack(1, __pyx_int_10); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);

这里出了什么问题(如果有的话)?谢谢!

1 个答案:

答案 0 :(得分:2)

正如评论所说,这里没有任何错误,所以不用担心。另外,请记住,您正在检查为简单分配生成的代码,任何差异都不会影响性能。

虽然在第二种情况下,seeds_idx = np.empty(10)应该更改为seeds_idx = np.empty(10, dtype=np.int)以匹配第一种情况。

如果添加,则还会添加为存储函数调用的参数(np.empty)而创建的字典:

__pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);

查找np.int

__pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_3);
__pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_int); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_4);
__Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

并在新创建的字典中设置参数:

if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_4) < 0) __PYX_ERR(0, 8, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

除此之外,它们之间的唯一区别如下:

if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 10, __pyx_L1_error)
  __pyx_t_5 = ((PyArrayObject *)__pyx_t_4);
  {
    __Pyx_BufFmt_StackElem __pyx_stack[1];
    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer, (PyObject*)__pyx_t_5, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
      __pyx_v_seeds_idx = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.buf = NULL;
      __PYX_ERR(0, 10, __pyx_L1_error)
    } else {__pyx_pybuffernd_seeds_idx.diminfo[0].strides = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_seeds_idx.diminfo[0].shape = __pyx_pybuffernd_seeds_idx.rcbuffer->pybuffer.shape[0];
    }
  }

其中,as stated in the documentation you linked最有可能是为了快速访问数据缓冲区而执行的。

到目前为止,最好的选择是使用typed memoryviews。这些是本机方式,很可能是在cython中使用数组的最简单方法。 Their performance is usually on par with numpy arrays如果没有,你可以随时轻松切换它们。