Question

我试图编写涉及Numpy ndarray的代码，但它们可能属于不同的类型。我遇到的问题是，我不知道如何使Cython快速处理所有输入输出类型组合。这是我想要实现的MWE（当然，这不是编译，但它代表了我想要实现的目标）：

general_types.pyx：

import numpy as np
cimport numpy as np
cimport cython
from cython cimport integral
from libcpp.vector cimport vector
from numpy cimport uint64_t

cdef extern from "<cstring>" namespace "std":
    void* memcpy (void* destination, const void* source, size_t num)

def cywhere(np.ndarray a not None, np.ndarray b not None):
    a = np.asarray(a, order='C')
    b = np.asarray(b, order='C')

    a_ind, b_ind = cywhere_wrapped[cython.typeof(a), cython.typeof(b)](a, b)

    a_ind = a_ind.astype(np.min_scalar_type(a.shape[0]))
    b_ind = b_ind.astype(np.min_scalar_type(b.shape[0]))

    return a_ind, b_ind

@cython.boundscheck(False)
@cython.wraparound(False)
cdef cywhere_wrapped(np.ndarray[integral] a, np.ndarray[integral] b):
    cdef vector[integral] a_ind = vector[integral]()
    cdef vector[integral] b_ind = vector[integral]()
    cdef uint64_t na = a.shape[0], nb = b.shape[0]

    return vector_to_np(a_ind), vector_to_np(b_ind)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef vector_to_np(vector[integral]& vec):
    cdef np.ndarray[integral] arr = np.empty(vec.size(), dtype=integral)

    cdef void* arr_p = <void*> arr.data
    cdef void* vec_p = <void*> &vec[0]

    memcpy(arr_p, vec_p, sizeof(integral) * vec.size())

    return arr

setup.py：

#!/usr/bin/env python
from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy as np

cython_extensions = [
    Extension('*', ['**/*.pyx'], language='c++', extra_compile_args=['-std=c++11'])
]

setup(
    ext_modules=cythonize(cython_extensions, annotate=True),
    include_dirs=[np.get_include()],
)

基本上，我希望能够处理任何输入/输出类型，而无需在将数组传递给cywhere之前转换数组的类型。 a和b可以是任何整数类型（彼此不同），输出a_ind和b_ind也可以是其他完全不同的整数类型。这有几个障碍：

自动为cywhere_wrapped选择正确的重载。
针对np.min_scalar_type(a.shape[0])获取两个输出的正确类型，以避免分配额外的内存。
自动为vector_to_np
sizeof(integral)应该正确评估

有没有办法通过一个代码库实现这一目标？如果能让它变得更简单，我愿意接受C ++。

编辑：编译错误：

general_types.pyx:15:34: Can only index fused functions with types
general_types.pyx:29:24: Cannot coerce to a type that is not specialized
general_types.pyx:29:45: Cannot coerce to a type that is not specialized
general_types.pyx:34:44: Cannot coerce to a type that is not specialized
general_types.pyx:37:31: Type is not specialized
general_types.pyx:41:11: Type is not specialized
general_types.pyx:15:19: Invalid use of fused types, type cannot be specialized
general_types.pyx:34:9: Invalid use of fused types, type cannot be specialized
general_types.pyx:36:31: Invalid use of fused types, type cannot be specialized

Answer 1

这里的主要困难是你使用两种类型：

numpy dtype，它是运行时确定的Python对象，
由Cython融合类型指定的类型，它是确定C类型的编译时间。

在这两种类型之间进行转换并不是特别容易（有意义的是，无法转换“dtype-＆gt;融合类型”，因为这会在运行时使用信息来生成编译时需要的东西）但是你无法转换其他方式是没有意义的。为了解决这个问题，我使用了伪参数 - 不使用的参数，但强制Cython为特定类型生成代码。

第二个问题是您希望a和b数组是不同的类型（我最初误读了您的评论并认为您想要相同的类型......）。为此，您只需要duplicate the fused type declaration（请参阅有关使用Cython进行的更改的说明＆gt; 0.20.x）。这也意味着您必须自己优化integral - 我也添加了int8_t。一个结果是，这最终定义了每个函数的4*4*4*4版本，这需要一段时间来编译，为您提供一个大模块，并在出错时为您提供每个错误消息的4*4*4*4个副本< / p>

我做了以下更改：

从numpy数组切换到类型化的内存视图 - 我不是100％肯定，但我认为numpy数组不适用于融合类型
创建了两个新的融合类型uint_size_1/2。与内置integral不同，它包含无符号整数（对索引有意义），以及8位整数（对节省空间有意义）。这将用于表示输出的类型。
使内部函数（cywhere_internal）接受伪参数。这是一个数组参数（使用非数组参数不起作用，原因不是我100％清楚）。我们的想法是我们计算出存储索引需要dtype，使用dtype创建（空）数组，然后将该数组传递给cywhere_internal以选择正确的版本功能。 cywhere_internal 必须为def而不是cdef，因为选择是在运行时进行的。
为uint_size添加了一个虚拟vector_to_np参数。这有点像黑客，但没有伪参数，Cython似乎并没有把它解释为融合类型函数。该论点被忽略了。
删除了大多数cython.boundscheck(False)和cython.wrapround(False)装饰器。在这些问题上你没有做任何事情，所以我认为把它们排除在外是一件好事。
我将dtype个参数传递给所有级别，因为我们需要vector_to_np函数。
其他一些小改变......

代码：

import numpy as np
cimport cython
from libcpp.vector cimport vector
from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t,
                          int8_t, int16_t, int32_t, int64_t)

ctypedef fused uint_size_a:
    uint8_t
    uint16_t
    uint32_t
    uint64_t
ctypedef fused uint_size_b:
    uint8_t
    uint16_t
    uint32_t
    uint64_t

ctypedef fused integral_a:
    int8_t
    int16_t
    int32_t
    int64_t
ctypedef fused integral_b:
    int8_t
    int16_t
    int32_t
    int64_t

cdef extern from "<cstring>" namespace "std":
    void* memcpy (void* destination, const void* source, size_t num)

def cywhere(integral_a[::1] a not None, integral_b[::1] b not None):
    dtype_a = np.min_scalar_type(a.shape[0])
    dtype_b = np.min_scalar_type(b.shape[0])

    return cywhere_internal(np.zeros((0,),dtype=dtype_a),np.zeros((0,),dtype=dtype_b),
                            dtype_a,dtype_b,
                            a,b)

def cywhere_internal(uint_size_a[:] dummya, uint_size_b[:] dummyb, 
                     dtype_a, dtype_b,
                     integral_a[::1] a, integral_b[::1] b):
    cdef vector[uint_size_a] a_ind = vector[uint_size_a]()
    cdef vector[uint_size_b] b_ind = vector[uint_size_b]()
    cdef uint_size_a dummy2a = 0
    cdef uint_size_b dummy2b = 0

    a_ind_np = vector_to_np(dummy2a, dtype_a, a_ind)
    b_ind_np = vector_to_np(dummy2b, dtype_b, b_ind)
    return a_ind_np, b_ind_np

cdef vector_to_np(uint_size_a dummy, dtype, vector[uint_size_a]& vec):
    cdef uint_size_a[::1] arr = np.empty(vec.size(),dtype=dtype)

    cdef void *arr_p

    with cython.boundscheck(False):
        # we're fine doing this on a 0 length array
        arr_p = <void*> &arr[0]
    cdef void* vec_p = <void*> &vec[0]

    memcpy(arr_p, vec_p, sizeof(uint_size_a) * vec.size())

    return np.asarray(arr)

很难确定这是否正常，因为它总是生成0个长度数组。

我并不完全相信通过使用最小的索引类型来节省空间的价值 - 在运行时选择正确版本的cywhere_internal会有一个小的减速。如果我真的没空间，我只会选择这样做。

使用Cython编程 - 使用共享代码的ndarray的常规类型

1 个答案: