python到C性能问题

时间:2014-12-09 15:49:24

标签: python performance ctype

我有这个python代码

def transferIntListToIntArray(sourceTuple):

    myArrayLen = len(sourceTuple)

    if myArrayLen > 0:
       targetArray = (c_int * myArrayLen) (*sourceTuple)
       return targetArray

    else:
        return

def transformIntTupleToIntTab(Input):

    if Input != None:
        if type(Input) == int:
           Input = (Input,)
        return transferIntListToIntArray(Input);

    else:
      return None;

def myfunc(input):

    inputTab = transformIntTupleToIntTab(input)
   mylib.myfuncC.argtype = [type(inputTab)]
   return mylib.myfuncC(inputTab)

我有一个python文件,其中包含myFunc python函数的数千个调用,例如(myfunc((0,1,2,3)),如果我尝试评估python代码的时间,这个python代码行有一个重要的成本:

targetArray = (c_int * myArrayLen) (*sourceTuple)

0.1 - 0.2秒,10000次调用此函数(python 2.5.1)。 这里只是一个基本的例子,但我的实际代码有几个转换的元组python到int *或double *在C中我想知道如何编写更高效的Python代码

示例:

import time
from ctypes import *
from cmath import *

def transferIntListToIntArray(sourceTuple):
    myArrayLen=len(sourceTuple)
    if myArrayLen>0:
       targetArray = (c_int * myArrayLen) (*sourceTuple)
       return targetArray
    else:
        return


def transformIntTupleToIntTab(Input):
   if Input != None:
      if type(Input) == int:
         Input = (Input,)
      return transferIntListToIntArray(Input);
   else:
      return None;

def myfunc(iCurve):
   iCurveTab = transformIntTupleToIntTab(iCurve)
   return 0

test = (0,1,2,3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,55,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)

start = time.clock()

for i in range(100000):
   myfunc(test)
print "done, elapsed wall clock time (win32) in seconds: " , time.clock() - start

完成,经过的挂钟时间(win32),以秒为单位:0.497456968189

python 2.7中的相同示例1.65秒(奇怪)

Python报告:

    done, elapsed wall clock time (win32) in seconds:  0.582374385947
         400091 function calls in 0.590 CPU seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.590    0.590 <string>:1(<module>)
        1    0.001    0.001    0.590    0.590 {execfile}
        1    0.057    0.057    0.589    0.589 tupletest.py:1(<module>)
   100000    0.035    0.000    0.526    0.000 tupletest.py:22(myfunc)
   100000    0.068    0.000    0.491    0.000 tupletest.py:14(transformIntTupleToIntTab)
   100000    0.417    0.000    0.423    0.000 tupletest.py:5(transferIntListToIntArray)
   100003    0.006    0.000    0.006    0.000 {len}
        1    0.004    0.004    0.005    0.005 __init__.py:4(<module>)
        1    0.002    0.002    0.002    0.002 {range}
        1    0.000    0.000    0.000    0.000 _endian.py:4(<module>)
        4    0.000    0.000    0.000    0.000 __init__.py:83(CFUNCTYPE)
        2    0.000    0.000    0.000    0.000 __init__.py:211(POINTER)
        1    0.000    0.000    0.000    0.000 __init__.py:291(CDLL)
        1    0.000    0.000    0.000    0.000 __init__.py:335(PyDLL)
        1    0.000    0.000    0.000    0.000 __init__.py:441(PYFUNCTYPE)
        1    0.000    0.000    0.000    0.000 __init__.py:346(WinDLL)
        4    0.000    0.000    0.000    0.000 struct.py:43(calcsize)
        1    0.000    0.000    0.000    0.000 __init__.py:322(__getattr__)
        1    0.000    0.000    0.000    0.000 __init__.py:370(OleDLL)
        1    0.000    0.000    0.000    0.000 __init__.py:384(__getattr__)
        1    0.000    0.000    0.000    0.000 __init__.py:329(__getitem__)
        2    0.000    0.000    0.000    0.000 __init__.py:309(__init__)
        1    0.000    0.000    0.000    0.000 {_ctypes.LoadLibrary}
        3    0.000    0.000    0.000    0.000 struct.py:35(_compile)
        1    0.000    0.000    0.000    0.000 {_ctypes.set_conversion_mode}
        4    0.000    0.000    0.000    0.000 __init__.py:381(__init__)
        2    0.000    0.000    0.000    0.000 {time.clock}
       18    0.000    0.000    0.000    0.000 {_ctypes.sizeof}
        3    0.000    0.000    0.000    0.000 __init__.py:101(CFunctionType)
        1    0.000    0.000    0.000    0.000 {isinstance}
        1    0.000    0.000    0.000    0.000 __init__.py:442(CFunctionType)
        1    0.000    0.000    0.000    0.000 __init__.py:144(c_short)
        1    0.000    0.000    0.000    0.000 __init__.py:380(LibraryLoader)
        2    0.000    0.000    0.000    0.000 {setattr}
        1    0.000    0.000    0.000    0.000 _endian.py:22(_swapped_meta)
        1    0.000    0.000    0.000    0.000 __init__.py:340(_FuncPtr)
        1    0.000    0.000    0.000    0.000 __init__.py:136(py_object)
        1    0.000    0.000    0.000    0.000 {method 'startswith' of 'str' objects}
        1    0.000    0.000    0.000    0.000 _endian.py:45(BigEndianStructure)
        1    0.000    0.000    0.000    0.000 __init__.py:181(c_ulonglong)
        1    0.000    0.000    0.000    0.000 __init__.py:167(c_float)
        1    0.000    0.000    0.000    0.000 __init__.py:147(c_ushort)
        1    0.000    0.000    0.000    0.000 __init__.py:178(c_longlong)
        1    0.000    0.000    0.000    0.000 __init__.py:197(c_char)
        1    0.000    0.000    0.000    0.000 __init__.py:305(_FuncPtr)
        1    0.000    0.000    0.000    0.000 __init__.py:376(_FuncPtr)
        1    0.000    0.000    0.000    0.000 __init__.py:153(c_ulong)
        1    0.000    0.000    0.000    0.000 __init__.py:243(c_wchar)
        1    0.000    0.000    0.000    0.000 __init__.py:204(c_void_p)
        1    0.000    0.000    0.000    0.000 __init__.py:187(c_ubyte)
        1    0.000    0.000    0.000    0.000 __init__.py:201(c_char_p)
        1    0.000    0.000    0.000    0.000 __init__.py:240(c_wchar_p)
        1    0.000    0.000    0.000    0.000 __init__.py:150(c_long)
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 __init__.py:193(c_byte)
        1    0.000    0.000    0.000    0.000 __init__.py:170(c_double)
        1    0.000    0.000    0.000    0.000 __init__.py:357(HRESULT)
        1    0.000    0.000    0.000    0.000 __init__.py:350(_FuncPtr)

带有2个标签的示例:(目的是通过调用mylibC.myfunC(iCurveTab1 , iCurveTab2)将iCurve1和iCurve2转换为int * for C函数),C代码为myfuncC(int *iCurveTab1, int *iCurveTab2)

import time
from ctypes import *
from cmath import *

def transferIntListToIntArray(sourceTuple):
    myArrayLen=len(sourceTuple)
    if myArrayLen>0:
       targetArray = (c_int * myArrayLen) (*sourceTuple)
       return targetArray
    else:
        return


def transformIntTupleToIntTab(Input):
   if Input != None:
      if type(Input) == int:
         Input = (Input,)
      return transferIntListToIntArray(Input);
   else:
      return None;

def myfunc(iCurve1, iCurve2):
   iCurveTab1 = transformIntTupleToIntTab(iCurve1)
   iCurveTab2 = transformIntTupleToIntTab(iCurve2)
   return 0

test = (0,1,2,3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,55,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)

start = time.clock()

for i in range(100000):
   myfunc(test, test)
print "done, elapsed wall clock time (win32) in seconds: " , time.clock() - start

在Python 2.5中: 完成,经过的挂钟时间(win32),以秒为单位:0.96631573455

在Python 2.7中: 完成,经过的挂钟时间(win32),以秒为单位:3.25996918937

  1. 我如何改进这个python代码? (漂亮的以前的deets想法不起作用,因为iCurveTab1和iCurveTab2将使用相同的C指针
  2. 由于此代码
  3. ,它似乎在python 2.7中有回归

    我的真实代码与c接口  Python代码mytest.py

     import time
        from ctypes import *
        from cmath import *
    
        def transferIntListToIntArray(sourceTuple):
            myArrayLen=len(sourceTuple)
            if myArrayLen>0:
               targetArray = (c_int * myArrayLen) (*sourceTuple)
               return targetArray
            else:
                return
    
    
        def transformIntTupleToIntTab(Input):
           if Input != None:
              if type(Input) == int:
                 Input = (Input,)
              return transferIntListToIntArray(Input);
           else:
              return None;
    
        def myfunc(iCurve1, iCurve2):
           iCurveTab1 = transformIntTupleToIntTab(iCurve1)
           iCurveTab2 = transformIntTupleToIntTab(iCurve2)
           return mylibC.myfuncC(len(iCurveTab1), len(iCurveTab2), iCurveTab1, iCurveTab2)
    

    C代码

    void myfuncC(int ilen1, int ilen2, int *piCurve1, int *piCurve2)
    {
    
      return;
    }
    

    如果iCurveTab1和iCurveTab2共享相同的缓存piCurve1 = piCurve2,这是一个问题,因为值被删除

    我执行的python:

    from mytest *
    myfunc((1,2,3,4),(7,8,9,10,11))
    

    由于

1 个答案:

答案 0 :(得分:1)

不重新创建数组似乎为我节省了相当多的时间。

 import time
 from ctypes import *
 from cmath import *


 iCurve1Cache = {}
 iCurve2Cache = {}

 def transferIntListToIntArray(sourceTuple, array_type_cache):
     myArrayLen=len(sourceTuple)
     if myArrayLen>0:
         if myArrayLen not in array_type_cache:
             array_type_cache[myArrayLen] = (c_int * myArrayLen)()

         targetArray = array_type_cache[myArrayLen]
         targetArray[:] = sourceTuple
         return targetArray
     else:
         return


 def transformIntTupleToIntTab(Input):
    if Input != None:
       if type(Input) == int:
          Input = (Input,)
       return transferIntListToIntArray(Input);
    else:
       return None;

 def myfunc(iCurve1, iCurve2):
    iCurveTab1 = transformIntTupleToIntTab(iCurve1, iCurve1Cache)
    iCurveTab2 = transformIntTupleToIntTab(iCurve2, iCurve2Cache)
    return 0

 test = (0,1,2,3,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,55,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5)

 start = time.clock()

 for i in range(100000):
    myfunc(test)
 print "done, elapsed wall clock time (win32) in seconds: " , time.clock() - start

在我的Mac上,这会将时间从1.267731秒降低到0.36秒。

如果您想进行更多优化,我们需要更多信息。