有没有办法加快这个功能?

时间:2013-07-05 10:23:37

标签: python-3.x f# benchmarking list-comprehension

我正在比较这个F#功能的表现:

let e28 N =                               
  seq {for i in 2L..2L..N do for j in 1..4 -> i} |> Seq.scan (+) 1L |> Seq.sum  

使用Python 3.3等价物:

def e28a(N = 100000):
    diagNumber = 1                             
    sum        = diagNumber                
    for width in range(2, N+1, 2):
        for j in range(4):          
            diagNumber += width             
            sum        += diagNumber            
    return sum

import itertools as it
def e28b(N = 100000):
    return sum(it.accumulate(it.chain([1], (i for i in range(2, N+1, 2) for j in range(4)))))    

import numpy as np
def e28c(N = 100000):
    return np.sum(np.cumsum(np.fromiter(chain([1], (i for i in range(2, N+1, 2) for j in range(4))), np.int64)))

我在Windows 7上获得的64位CPython 3.3.1性能比C ++慢约574倍。以下是N = 100000的时间:

e28:23ms; e28a:48.4ms; e28b:49.7ms; e28c:40.2ms; C ++版本:0.07ms

在不改变基础算法的情况下优化Python代码是否有一个很低的成果?

3 个答案:

答案 0 :(得分:4)

通过切换到一个程序性的,可变的方法(比如你的python e28a),F#版本可以加速~10倍。当“有效载荷操作”(在这种情况下,只是+)是如此微不足道时,组合器的使用最终会增加相对显着的开销。作为旁注,Seq.sum使用了检查算术,这也增加了一些开销。

F#的一个好处是,如果需要一个完整的热门路径,你可以回归到程序/可变的风格。

let e28_original N =
  seq {
    for i in 2UL..2UL..N do 
        for j in 1..4 do
            yield i
  }
  |> Seq.scan (+) 1UL
  |> Seq.sum

let e28_mutable N = 
  let mutable sum = 1UL
  let mutable total = sum                            
  for i in 2UL..2UL..N do 
      for j in 1..4 do
         sum <- sum + i
         total <- total + sum
  total

let time f =
    f () |> ignore // allow for warmup / JIT
    let sw = System.Diagnostics.Stopwatch.StartNew()
    let result = f ()
    sw.Stop()
    printfn "Result: %A Elapsed: %A" result sw.Elapsed

time (fun _ -> e28_original 100000UL)
time (fun _ -> e28_mutable 100000UL)

结果

Result: 666691667100001UL Elapsed: 00:00:00.0429414
Result: 666691667100001UL Elapsed: 00:00:00.0034971

答案 1 :(得分:3)

使用你的F#版本我得到了:

> e28(100000L);;
Real: 00:00:00.061, CPU: 00:00:00.062, GC gen0: 2, gen1: 0, gen2: 0
val it : int64 = 666691667100001L

使用:

let e28d N =
    seq {2L..2L..N}
    |> Seq.collect(fun x->seq{yield x;yield x; yield x; yield x})
    |> Seq.scan (+) 1L
    |> Seq.sum

我得到了:

> e28d(100000L);;
Real: 00:00:00.040, CPU: 00:00:00.031, GC gen0: 2, gen1: 0, gen2: 0
val it : int64 = 666691667100001L

由于编译了F#并解释了Python,因此您可能很难让python的表现与F#完全相同。话虽如此,上述改进也适用于python:

>>> def e28a(N = 100000):
    diagNumber = 1;                            
    sum        = diagNumber;                   
    for width in range(2, N+1, 2):
        for j in range(4):          
            diagNumber += width;                
            sum        += diagNumber;           
    return sum;

>>> if __name__ == '__main__':
    import timeit
    print(timeit.timeit("e28a()", setup="from __main__ import e28a", number=10))


0.5249497228663813
>>> def e28a(N = 100000):
    diagNumber = 1;
    sum        = diagNumber;
    for width in range(2, N+1, 2):
        diagNumber += width;
        sum        += diagNumber;
        diagNumber += width;
        sum        += diagNumber;
        diagNumber += width;
        sum        += diagNumber;
        diagNumber += width;
        sum        += diagNumber;
    return sum;

>>> if __name__ == '__main__':
    import timeit
    print(timeit.timeit("e28a()", setup="from __main__ import e28a", number=10))


0.2585966329330063
>>> 

部分改进来自较少的函数调用,即:

>>> def e28a(N = 100000):
    diagNumber = 1;                            
    sum        = diagNumber;
    temp_range = range(4)             #Change here
    for width in range(2, N+1, 2):
        for j in temp_range:          #Change here
            diagNumber += width;                
            sum        += diagNumber;           
    return sum;

>>> if __name__ == '__main__':
    import timeit
    print(timeit.timeit("e28a()", setup="from __main__ import e28a", number=10))


0.40251470339956086
>>> 

我认为另一部分来自删除循环。在Python中,这两者都相当昂贵。

答案 2 :(得分:1)

这几乎是我机器的两倍。它使用了memoization,也使用了基本的算术推导。

您必须定义一个全局变量。

summi=2

def e28d(N = 100000):
    def memo(width):
        global summi
        summi+=width*4+4
        return summi-width*2+2
    x= sum((memo(width*4)) for width in range (2, N+1, 2))+1
    return x 

结果:
e28a:

0.0591201782227秒

e28d:

0.0349650382996秒

希望它至少是建设性的。注意:您必须根据数字是否为奇数对其进行调制。

<强>更新 这是一个在python中运行速度快一百倍的函数(N = 100000时大约0.5 ms),完全避免循环:

import math
def e28e(X = 100000):
    keyint, keybool=int(X/6), X%6
    if keybool/2==0: keyvar=(16*keyint+sum(range(keyint))*12)
    elif keybool/2==1: keyvar=(44*keyint+sum(range(keyint))*36+7) 
    else: keyvar=(28*(keyint+1)+sum(range(keyint+1))*60-2)
    X-=keybool%2
    diag= math.pow(X,2)+2*X+1
    newvar=keyint+int(X/2)+1
    summ= int(diag*newvar+keyvar)
    return summ