Question

我试图使用连续的过度松弛方法找到给定边界条件的潜力。

我有2个解决方案：

-One迭代所有元素并应用公式field[y,x] = (1-alpha)*field[y,x] + (field[max(y-1,0),x] + field[min(y+1,field.shape[0]-1),x] + field[y,max(x-1,0)] + field[y,min(x+1,field.shape[1]-1)]) * alpha/4。这很慢，因为它不能很好地访问内存。

- 另一个，我创建4个矩阵在4个方向上移动1.我应用相同的公式，然后向上添加矩阵。然而，这并未考虑在当前迭代期间进行的修改。这比前一个快得多。

当alpha = 1.9时，第一个算法收敛，而第二个算法收敛。对于alpha = 1.0，两者都收敛但非常缓慢。

谁能告诉我我做错了什么？我该如何解决快速解决方案。

完整代码：

#! python3

import numpy
import math
import time

def solve_laplace(boundary, mask, file = None, alpha = 1.0, threshold = 0.0001):
    """
    We are using the successive over-relaxation method. We iterate until our solution changes less than some threshold value.

    Vm+1(x,y,...) = alpha*( ((Vm(x-1,y,...) + Vm(x+1,y,...) + Vm(x,y-1,...) + Vm(x,y+1,...) + ...)/(2*nr dimensions) ) + (1-alpha)*Vm(x,y,...)
    """

    dim = boundary.ndim

    threshold = 0.0001
    field = numpy.zeros_like(boundary)
    numpy.copyto(field, boundary, casting = "safe", where = mask)
    last_diff = float("infinity")

    for iter_nr in range(10000):#max number of iterations
        prev = field.copy() #make a copy of the field at the start of the iteration (python always stores pointers unless you explicitly copy something)

        for d in range(dim): #can be scaled to arbitrary dimensions, using 2D for testing

            #these 2 blocks are hard to follow but they work, read the comments
            front = prev[tuple(0 if i==d else slice(None) for i in range(dim))] #select front face of cube/whatever
            front = front[tuple(numpy.newaxis if i==d else slice(None) for i in range(dim))] #prepare it for next step
            front = numpy.concatenate((front,prev),d) #add it the previous iteration's result
            front = front[tuple(slice(-1) if i==d else slice(None) for i in range(dim))] #remove the back side of the previous iteration's result
            #we now have the volume shifted right by 1 pixel, x now corresponds to the x-1 term

            back = prev[tuple(-1 if i==d else slice(None) for i in range(dim))] #select back face of cube/whatever
            back = back[tuple(numpy.newaxis if i==d else slice(None) for i in range(dim))] #prepare it for next step
            back = numpy.concatenate((prev,back),d) #add it the previous iteration's result
            back = back[tuple(slice(1,None) if i==d else slice(None) for i in range(dim))] #remove the front side of the previous iteration's result
            #we now have the volume shifted left by 1 pixel, x now corresponds to the x+1 term

            field += (front + back) * alpha/(2*dim) #this part of the formula: alpha*( ((Vm(x-1,y,...) + Vm(x+1,y,...) + Vm(x,y-1,...) + Vm(x,y+1,...))/(2*nr dimensions)
            #numpy.copyto(field, boundary, casting = "safe", where = mask)

        field -= alpha*prev #this part of the formula: (1-alpha)*Vm(x,y,...)
        #reset values at boundaries
        numpy.copyto(field, boundary, casting = "safe", where = mask) 

        #check if the difference is less than threshold
        average = math.sqrt(numpy.average(field**2)) #sqrt of average of squares, just so i get a positive number
        diff = math.sqrt(numpy.average((field-prev)**2)) #standard deviation

        if last_diff < diff/average:
            print("Solution is diverging.")
            break

        if diff/average < threshold:
            print("Found solution after", iter_nr,"iteratiorn.")
            break

        last_diff = diff/average

    if file is not None:
        numpy.save(file,field)
    return field



def solve_laplace_slow_2D(boundary, mask, file = None, alpha = 1.9,threshold = 0.0001):
    """
    We are using the successive over-relaxation method. We iterate until our solution changes less than some threshold value.

    Vm+1(x,y,...) = alpha*( ((Vm(x-1,y,...) + Vm(x+1,y,...) + Vm(x,y-1,...) + Vm(x,y+1,...) + ...)/(2*nr dimensions) ) + (1-alpha)*Vm(x,y,...)
    """

    assert boundary.ndim == 2

    field = numpy.zeros_like(boundary)
    numpy.copyto(field, boundary, casting = "safe", where = mask) 
    last_diff = float("infinity")
    start_time = time.time()

    for iter_nr in range(10000):#max number of iterations
        prev = field.copy()
        for y in range(field.shape[0]):
            for x in range(field.shape[1]):
                if not mask[y,x]:
                    field[y,x] = (1-alpha)*field[y,x] + (field[max(y-1,0),x] + field[min(y+1,field.shape[0]-1),x] + field[y,max(x-1,0)] + field[y,min(x+1,field.shape[1]-1)]) * alpha/4

        #check if the difference is less than threshold
        average = math.sqrt(numpy.average(field**2)) #sqrt of average of squares, just so i get a positive number
        diff = math.sqrt(numpy.average((field-prev)**2)) #standard deviation

        if last_diff < diff/average:
            print("Solution is diverging.")
            break

        if diff/average < threshold:
            print("Found solution after the", iter_nr,"iteratiorn.")
            break

        if time.time() - start_time > 3600:
            print("Completed in an hour time at iteration:", iter_nr)
            break

        last_diff = diff/average

        #print(time.time() - start_time, iter_nr, last_diff)

    if file is not None:
        numpy.save(file,field)
    return field

def test():
    boundary = numpy.zeros((51,51))
    boundary[25,25] = 1
    for i in range(51):
        boundary[0,i] = -1
        boundary[50,i] = -1
        boundary[i,0] = -1
        boundary[i,50] = -1
    mask = (boundary != 0)

    print("Trying fast method:")
    solve_laplace(boundary,mask,alpha = 1.5) #diverges
    print("Trying slow method:")
    solve_laplace_slow_2D(boundary,mask,alpha = 1.5) #converges but is very slow

Answer 1

这很棘手......如果您可以在单个ufunc中定义整个操作，如和 ufunc在没有缓冲的情况下运行，您可以进行这种类型的迭代计算非常快。在您的情况下，可以按如下方式一次处理数组内部：

>>> a = np.arange(25, dtype=np.double).reshape(5, 5)
>>> from numpy.lib.stride_tricks import as_strided
>>> rows, cols = a.shape
>>> a_view = as_strided(a, shape=(rows-3+1, cols-3+1, 3, 3) ,strides=a.strides*2)
>>> alpha = 2
>>> mask = [[0, alpha/4, 0], [alpha/4, 1-alpha, alpha/4], [0, alpha/4, 0]]

如果我们在没有更新的情况下处理数组内部，我们会得到：

>>> np.einsum('ijkl,kl->ij', a_view, mask)
array([[  6.,   7.,   8.],
       [ 11.,  12.,  13.],
       [ 16.,  17.,  18.]])

但是如果我们告诉np.einsum将结果存储在同一个数组中，那么看看会发生什么：

>>> np.einsum('ijkl,kl->ij', a_view, mask, out=a[1:-1, 1:-1])
array([[  3.       ,   2.25     ,   5.625    ],
       [  5.5625   ,   4.1875   ,   9.09375  ],
       [ 19.046875 ,  17.546875 ,  24.2734375]])
>>> a
array([[  0.       ,   1.       ,   2.       ,   3.       ,   4.       ],
       [  5.       ,   3.       ,   2.25     ,   5.625    ,   9.       ],
       [ 10.       ,   5.5625   ,   4.1875   ,   9.09375  ,  14.       ],
       [ 15.       ,  19.046875 ,  17.546875 ,  24.2734375,  19.       ],
       [ 20.       ,  21.       ,  22.       ,  23.       ,  24.       ]])

然后，您需要单独处理边框，但对于大型阵列，边框是计算总量中可忽略不计的部分。除非您一次处理一行，并且交易速度一致，否则您将无法获得与原始解决方案完全相同的更新模式。但它会大大提高性能。您主要关心的是这取决于实现细节，并且无法保证在未来的numpy版本中，np.einsum的缓冲将会改变并且会破坏您的代码。但是如果你的数组表现良好（正确对齐并且以原生字节顺序排列），除非你在创建它时做了奇怪的事情，否则很有可能它会起作用。

连续的过度放松不会收敛（当没有就地完成时）

1 个答案: