Question

我正在尝试扫描n个对象的可迭代属性。我正在寻找一种 pythonic 方法，通过将函数传递给一级循环的方法调用，在任意深度的嵌套循环中执行函数。当深度为3时，我还没有能够获得超过最内部循环的运行。这是一个不工作的python伪代码，我在循环中的每个点查询不同的值。另一个困难是我试图捕获输出并将其传递给下一个外部循环

class Parent(object):
    def __init__(self):
        self.iterable = [None] * 2
        self.result = self.iterable[:]
    def loop(self, query_func):
        def innerloop():
            for i, x in enumerate(self.iterable):
                self.result[i] = query_func(x)
            return self.result[:]
        return innerloop
class ChildA(Parent):
    def __init___(self, A, object_to_queryA):
        self.iterableA = [valueA for valueA in range(A)]
        self.resultA = self.iterableA[:]
        self.object_to_query = object_to_queryA
    def query_valueA(self, x):
        return self.object_to_query.some_query_function(x)
class ChildB(Parent):
    def __init___(self, B, object_to_queryB):
        self.iterableB = [valueB for valueB in range(B))]
        self.resultB = self.iterableB[:]
        self.object_to_query = object_to_queryB
    def query_valueB(self, x):
        return self.object_to_query.some_other_query_function(x)
class ChildC(Parent):
    def __init___(self, C, , object_to_queryC):
        self.iterableC = [valueC for valueC in range(C))]
        self.resultC = self.iterableC[:]
        self.object_to_query = object_to_queryC
    def query_valueC(self, x):
        return self.object_to_query.yet_another_query_function(x)

我希望能够按如下方式调用这些循环：

import numpy
query_objA, query_objB, query_objC = (SomeObjA(), SomeObjB(), SomeObjC())
A, B, C = (len(query_objA.data), len(query_objB.data), len(query_objC.data))
instA = ChildA(A, query_objA)
instB = ChildB(B, query_objB)
instC = ChildC(C, query_objC)
my_scanning_func = ChildA.loop(ChildB.loop(ChildC.loop))
my_queries = numpy.array(my_scanning_func()).reshape(A,B,C)
# Equally valid call example below:
my_scanning_func2 = ChildB.loop(ChildC.loop(ChildA.loop))
my_queries2 = numpy.array(my_scanning_func2()).reshape(B,C,A)

我正在寻找的终极功能将类似于下面，但对于任意深度和顺序：

for i, x in enumerate(query_objA.data):
    response[i] = instA.some_query_function(x)
    for j, y in enumerate(query_objB.data):
        response[i][j] = instB.some_other_query_function(y)
        for k, z in enumerate(query_objC.data):
            response[i][j][k] = instC.yet_another_query_function(z)

如果可以通过继承的递归函数完成，而不是为每个子元素定义单独的循环方法，那么

奖励点，正如我上面尝试的那样。最后注意：我正在尝试编写Python 2.7兼容代码。提前谢谢！

Answer 1

我不确定这是否能回答你的问题，但我认为这至少是相关的，如果你想生成一个numpy数组，使得array[tup] = func(tup) tup是一个整数索引的元组你可以将itertools.product与numpy.fromiter结合使用，如下所示：

import itertools
#from itertools import imap as map #for python 2
import numpy

def array_from_func(dimensions, func, dtype=float):
    ranges = (range(i) for i in dimensions) #ranges of indices for all dimensions
    all_indices = itertools.product(*ranges) #will iterate over all locations regardless of # of dimensions
    value_gen = map(func, all_indices) #produces each value for each location
    array = numpy.fromiter(value_gen, dtype=dtype)
    array.shape = dimensions #modify the shape in place, .reshape would work but makes a copy.
    return array

这对我看看索引与实际数组输出的关系非常有用，这里有三个演示基本功能的演示（我最近想到的第二个）

from operator import itemgetter
>>> array_from_func((2,3,4), itemgetter(1),int) #second index
array([[[0, 0, 0, 0],
        [1, 1, 1, 1],
        [2, 2, 2, 2]],

       [[0, 0, 0, 0],
        [1, 1, 1, 1],
        [2, 2, 2, 2]]])

>>> def str_join(it):
        return ",".join(map(str,it))
#the '<U5' in next line specifies strings of length 5, this only works when the string will actually be length 5
#changing to '<U%d'%len(str_join(dims)) would be more generalized but harder to understand
>>> print(array_from_func((3,2,7), str_join, '<U5')) 
[[['0,0,0' '0,0,1' '0,0,2' '0,0,3' '0,0,4' '0,0,5' '0,0,6']
  ['0,1,0' '0,1,1' '0,1,2' '0,1,3' '0,1,4' '0,1,5' '0,1,6']]

 [['1,0,0' '1,0,1' '1,0,2' '1,0,3' '1,0,4' '1,0,5' '1,0,6']
  ['1,1,0' '1,1,1' '1,1,2' '1,1,3' '1,1,4' '1,1,5' '1,1,6']]

 [['2,0,0' '2,0,1' '2,0,2' '2,0,3' '2,0,4' '2,0,5' '2,0,6']
  ['2,1,0' '2,1,1' '2,1,2' '2,1,3' '2,1,4' '2,1,5' '2,1,6']]]

>>> array_from_func((3,4), sum) #the sum of the indices, not as useful but another good demo
array([[ 0.,  1.,  2.,  3.],
       [ 1.,  2.,  3.,  4.],
       [ 2.,  3.,  4.,  5.]])

我认为这与你想要完成的事情有关，但我不太确定......如果我能更具体地说明你需要什么，请给我反馈。

Answer 2

经过多次discussion with the OP我对如何推广这些数组的构造有了更好的了解，首先看来你的对象可以设计为迭代预定义状态或查询当前状态（可能只有其中一个是有效的）所以对象的iterface将被抽象为这样的东西：

class Apparatus_interface:
    def __init__(self,*needed_stuff):
        #I have no idea how you are actually interacting with the device
        self._device = SET_UP_OBJECT(needed_stuff)

        #when iterating over this object we need to know how many states there are
        #so we can predefine the shape (dimensions) of our arrays
        self.num_of_states = 5

        #it would make sense for each object to define
        #the type of value that .query() returns (following spec of numpy's dtype)
        self.query_type = [('f1', float), ('f2', float)]

    def __iter__(self):
        """iterates over the physical positions/states of the apperatus
        the state of the device is only active in between iterations

        * calling list(device) doesn't give you any useful information, just a lot of mechanical work
        """
        for position in range(self.num_of_states):
                         # ^ not sure what this should be either, you will have a better idea
            self._device.move_to(position) #represents a physical change in the device
            yield position #should it generate different information?

    def query(self):
        return self._device.query()

使用此接口，您可以通过在多个设备上迭代（嵌套循环）来生成数组，并在它们之间的每个状态组合中查询另一个设备的状态（并将该值记录到数组中）

通常，您可以使用itertools.product生成设备状态的组合，但是由于优化itertools.product将运行影响物理设备的迭代代码，然后才能在迭代中使用它，所以你需要一个不适用这种优化的实现：

#values is a list that contains the current elements generated
#the loop: for values[depth] in iterables[depth] basically sets the depth-th element to each value in that level of iterable
def _product(iterables, depth, values):
    if len(iterables)-depth == 1:
        for values[depth] in iterables[depth]:
            yield tuple(values)
    else:
        for values[depth] in iterables[depth]:
            #yield from _product(iterables, depth+1, values)
            for tup in _product(iterables, depth+1, values):
                yield tup

def product(*iterables):
    """
    version of itertools.product to activate side-effects of iteration
    only works with iterables, not iterators.
    """
    values = [None]*len(iterables)
    return _product(iterables, 0, values)

现在实际生成数组 - 首先是一个迭代所有状态的产品并在每个状态下进行查询的过程，请注意states变量未被使用，因为我将假设该位置在numpy数组将由状态迭代的顺序决定，而不是产生的值

def traverse_states(variable_devices, queried_device):
    """queries a device at every combination of variable devices states"""
    for states in product(*variable_devices):
        yield queried_device.query()

然后将阵列放在一起的功能非常紧张：

def array_from_apparatus(variable_devices, queried_object, dtype=None):

    # the # of states in each device <==> # of elements in each dimension
    arr_shape = [device.num_of_states for device in variable_devices]

    iterator = traverse_states(variable_devices, queried_object)
    if dtype is None:
        dtype = queried_object.query_type

    array = numpy.fromiter(iterator, dtype=dtype)
    array.shape = arr_shape #this will fail if .num_of_states doesn't match the actual number of iterations
    return array

我不确定如何对此进行合理的测试，但我相信它会起作用或至少接近。

通过传递方法对象将循环嵌套到任意深度

2 个答案: