Question

我想通过将一些函数转换为cython来加速一个非常简单的Python代码。但是，在循环体内，我需要找到数组的最小值和最大值，这似乎是关键点。根据.html文件，这些行需要翻译成很多C代码。为什么？

这是完整的代码，下面我列出了让我头疼的行：

import numpy as np
cimport numpy as np
cimport cython
from cython cimport boundscheck, wraparound


@boundscheck(False)
@wraparound(False)
cdef box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):
    cdef bint isOverlapping = False
    cdef unsigned int xmin, xmax, width, boxXmin, boxXmax, ymin, ymax, height, boxYmin, boxYmax

    xmin = min(contourData[:, 0, 1])
    xmax = max(contourData[:, 0, 1])
    width = xmax - xmin
    boxXmin = boxTopLeftXY[0]
    boxXmax = boxTopLeftXY[0] + boxSize

    if xmin > (boxXmin-width/2):
        if xmax < (boxXmax+width/2):
            ymin = min(contourData[:, 0, 1])
            ymax = max(contourData[:, 0, 1])
            height = ymax - ymin
            boxYmin = boxTopLeftXY[1]
            boxYmax = boxTopLeftXY[1] + boxSize

            if ymin > (boxYmin-height/2):
                if ymax < (boxYmax+width/2):
                    isOverlapping = True

    return isOverlapping


@boundscheck(False)
@wraparound(False)
def def_get_indices_of_overlapping_particles(contours not None, unsigned int[:, :] topLefts, unsigned int boxSize):
    cdef Py_ssize_t i, j
    cdef unsigned int counter, numParticles, numTopLefts
    numParticles = len(contours)
    numTopLefts = topLefts.shape[0]
    cdef unsigned int[:] overlappingIndices = np.zeros(numParticles, dtype=np.uint32)
    cdef unsigned int[:, :, :] currentContour

    counter = 0
    for i in range(numParticles):
        currentContour = contours[i]
        for j in range(numTopLefts):
            if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):
                overlappingIndices[counter] = i
                counter += 1
                break

    return overlappingIndices[:counter]

该函数获取轮廓列表（从cv2中检索到的np.ndarray）和代表一定数量xy坐标的数组，其中以指定的boxsize放置矩形。该函数应该遍历轮廓并返回与框之一重叠的轮廓的索引。这些行似乎使整个过程极其缓慢（实际上，这比纯Python版本要慢。）：

+13:     xmin = min(contourData[:, 0, 1])
+14:     xmax = max(contourData[:, 0, 1])

同样，

+21:             ymin = min(contourData[:, 0, 1])
+22:             ymax = max(contourData[:, 0, 1])

在我不理解原因的情况下，其他有问题的行（但少了一些）：

+48:             if box_overlaps_contour(topLefts[j, :], boxSize, currentContour):

为什么函数调用已经如此复杂？数据类型匹配，所有都是无符号整数。

并且已经返回了bool值；我扩展了编译器的功能：

+31:     return isOverlapping
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_isOverlapping); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_r = __pyx_t_2;
  __pyx_t_2 = 0;
  goto __pyx_L0;

任何帮助将不胜感激！我似乎仍然不太了解cython的工作原理：// 如果需要，我可以提供更多信息！

非常感谢！！！：）

编辑：这是Cython在np.min（）行中所做的...：有任何想法吗？

+21:             ymin = np.min(contourData[:, 0, 1])
      __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 21, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_min); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 21, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
      __pyx_t_4.data = __pyx_v_contourData.data;
      __pyx_t_4.memview = __pyx_v_contourData.memview;
      __PYX_INC_MEMVIEW(&__pyx_t_4, 0);
      __pyx_t_4.shape[0] = __pyx_v_contourData.shape[0];
__pyx_t_4.strides[0] = __pyx_v_contourData.strides[0];
    __pyx_t_4.suboffsets[0] = -1;

{
    Py_ssize_t __pyx_tmp_idx = 0;
    Py_ssize_t __pyx_tmp_stride = __pyx_v_contourData.strides[1];
        if ((0)) __PYX_ERR(0, 21, __pyx_L1_error)
        __pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
}

{
    Py_ssize_t __pyx_tmp_idx = 1;
    Py_ssize_t __pyx_tmp_stride = __pyx_v_contourData.strides[2];
        if ((0)) __PYX_ERR(0, 21, __pyx_L1_error)
        __pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
}

__pyx_t_2 = __pyx_memoryview_fromslice(__pyx_t_4, 1, (PyObject *(*)(char *)) __pyx_memview_get_unsigned_int, (int (*)(char *, PyObject *)) __pyx_memview_set_unsigned_int, 0);; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 21, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __PYX_XDEC_MEMVIEW(&__pyx_t_4, 1);
      __pyx_t_4.memview = NULL;
      __pyx_t_4.data = NULL;
      __pyx_t_5 = NULL;
      if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
        __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_3);
        if (likely(__pyx_t_5)) {
          PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
          __Pyx_INCREF(__pyx_t_5);
          __Pyx_INCREF(function);
          __Pyx_DECREF_SET(__pyx_t_3, function);
        }
      }
      __pyx_t_1 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_5, __pyx_t_2) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2);
      __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
      if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 21, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
      __pyx_t_6 = __Pyx_PyInt_As_unsigned_int(__pyx_t_1); if (unlikely((__pyx_t_6 == (unsigned int)-1) && PyErr_Occurred())) __PYX_ERR(0, 21, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
      __pyx_v_ymin = __pyx_t_6;

Answer 1

使用np.min和np.max可能比Python的min和max函数更快（可能取决于数组的大小）。 Numpy函数将使用C缓冲区协议并在C数字类型上进行操作，而Python函数将使用Python迭代器协议并将数字视为Python对象。尽管如此，它们在Cython中的外观也一样黄色。

编辑：如果这样做没有帮助，则可能需要编写自己的cdef函数来执行minmax（以避免Python调用）。诸如此类（随后是未调试的代码...）

# return type is a C struct of 2 values - this should be quick...
cdef (double, double) minmax(double arr[:]):
    cdef double min = np.inf
    cdef double max = -np.inf
    cdef int i
    for i in range(arr.shape[0]):
        if arr[i] < min:
            min = arr[i]
        if arr[i] > max
            max = arr[i]
    return min, max

这样做的好处是可以同时执行一个循环，并且不需要Python函数调用。显然，它的缺点是您需要自己编写。

您看到的许多生成的C代码与memoryview切片有关，并且实际上并不太慢（尽管它占用了大量空间）。

cdef box_overlaps_contour(unsigned int[:] boxTopLeftXY, unsigned int boxSize, unsigned int[:, :, :] contourData):

未指定返回类型，因此它作为Python对象返回。您可以执行cdef bint box_overlaps_contour(...)返回“布尔整数”。

阵列上的Cython最小值和最大值

1 个答案: