数值求解代码变得更快(numpy / scipy)

时间:2014-03-03 15:36:06

标签: python numpy

我很感激帮助我如何让我的代码变得更快。目前,它实在太慢了。它基本上产生两个方波,具有不同的占空比,然后对其生成的方波应用特殊滤波器以从方波中提取频率分量,并尝试通过改变两个方的占空比来将该频率分量与值匹配波。

import os
import numpy as np
from scipy import optimize, integrate, signal
import math
import cmath

PI = np.pi


def Goetrzel(x, target_frequency, sample_rate):
    s_prev = 0
    s_prev2 = 0
    normalized_frequency = target_frequency / sample_rate

    wr = np.cos(2.0 * np.pi * normalized_frequency)
    wi = np.sin(2.0 * np.pi * normalized_frequency)

    coeff = 2.0 * wr
    for sample in x:
        s = sample + coeff * s_prev - s_prev2
        s_prev2 = s_prev
        s_prev = s

    XKreal = s_prev * wr - s_prev2
    XKimag = s_prev * wi

    XK = (XKreal + 1j*XKimag) / (len(x)/2.)

    #power = s_prev2 * s_prev2 + s_prev * s_prev - coeff * s_prev * s_prev2 ;
    return abs(XK), np.angle(XK)*180./PI


def equations(p, zcurr, z, k1, k2):
    P = lambda z, D1, D2: \
        signal.square(k1*z, duty=D1) * signal.square(k2*z, duty=D2)
    K12 = lambda z: -np.cos(np.pi/2.*z/L)+1.
    K32 = lambda z: -np.sin(np.pi/2.*z/L)+1.

    D1, D2 = p

    h = 0.01
    eq1 = Goetrzel(P(np.arange(0.,10.,h),D1,D2), k1/(2.*PI), 1./h)[0] - K12(zcurr)
    eq2 = Goetrzel(P(np.arange(0.,10.,h),D1,D2), k2/(2.*PI), 1./h)[0] - K32(zcurr)

    return eq1**2 + eq2**2


def DutyCycleSolver(z, k1, k2, display=False):
    D1 = np.empty([len(z)])
    D1.fill(np.nan)
    D2 = np.empty([len(z)])
    D2.fill(np.nan)
    Derr = np.empty([len(z)])
    Derr.fill(np.inf)
    D1_D2_guess = np.empty([len(z),2])

    for i in range(len(z)):
        solutionFound = False
        for guessD1 in np.arange(0.8, 1., 0.1):
            for guessD2 in np.arange(0.8, 1., 0.1):
                temp = optimize.fmin(equations,
                                     x0=(guessD1, guessD2),
                                     args=(z[i],z,k1,k2,),
                                     xtol=1e-6,
                                     ftol=1e-6,
                                     disp=False,
                                     full_output=True)
                if temp[0][0] < -1.e-8 or temp[0][1] < -1.e-8 or \
                   temp[0][0] > 1.+1.e-8 or temp[0][1] > 1.+1.e-8:
                    continue

                DerrCur = temp[1]
                if DerrCur <= 1.e-3:
                    D1[i], D2[i] = temp[0]
                    Derr[i] = temp[1]
                    D1_D2_guess[i] = [guessD1, guessD2]
                    solutionFound = True
                    break
                elif DerrCur > 1.e-3 and DerrCur < Derr[i]:
                    D1[i], D2[i] = temp[0]
                    Derr[i] = temp[1]
                    D1_D2_guess[i] = [guessD1, guessD2]

            if solutionFound is True:
                if display:
                    print 'Solution found at', z[i]
                    print 'Using:', D1[i], D2[i]
                    print 'Found with guess:', D1_D2_guess[i]
                    print 'Error:', Derr[i]
                    print
                break

        if solutionFound is False and display:
            print 'No solution found at', z[i]
            print 'Using:', D1[i], D2[i]
            print 'With guess:', D1_D2_guess[i]
            print 'Error:', Derr[i]
            print


h = 0.3
L = 2.e3
z = np.arange(0., L, h)

DutyCycleSolver(z, 3., 8., display=True)

1 个答案:

答案 0 :(得分:0)

这改善了代码的位位(0.000001%?)。所以这没用。

之前:

DerrCur = temp[1]
if DerrCur <= 1.e-3:
    D1[i], D2[i] = temp[0]
    Derr[i] = temp[1]
    D1_D2_guess[i] = [guessD1, guessD2]
    solutionFound = True
    break
elif DerrCur > 1.e-3 and DerrCur < Derr[i]:
    D1[i], D2[i] = temp[0]
    Derr[i] = temp[1]
    D1_D2_guess[i] = [guessD1, guessD2]

后:

DerrCur = temp[1]
if DerrCur <= 1.e-3:
    D1[i], D2[i] = temp[0]
    Derr[i] = temp[1]
    D1_D2_guess[i] = [guessD1, guessD2]
    solutionFound = True
    break
elif DerrCur < Derr[i]:
    D1[i], D2[i] = temp[0]
    Derr[i] = temp[1]
    D1_D2_guess[i] = [guessD1, guessD2]

显然temp = optimize.fmin(equations,,,是瓶颈。矢量运算执行约20 * 20 * 1000 = 400k次。这意味着如果矢量计算约为1毫秒,则需要400秒才能完成。