我做了这个基准测试:
import logging
import copy
import pstats
import cProfile
from io import StringIO
def difference(first_stats, second_stats):
first_stats = copy.deepcopy( first_stats )
first_stats.total_calls -= second_stats.total_calls
first_stats.prim_calls -= second_stats.prim_calls
first_stats.total_tt -= second_stats.total_tt
first_set = set( first_stats.stats.keys() )
second_set = set( second_stats.stats.keys() )
intersection = first_set.intersection( second_set )
for func in intersection:
first_source = first_stats.stats[func]
second_source = second_stats.stats[func]
first_cc, first_nc, first_tt, first_ct, callers = first_source
second_cc, second_nc, second_tt, second_ct, callers = second_source
first_stats.stats[func] = (first_cc-second_cc, first_nc-second_nc, first_tt-second_tt, first_ct-second_ct, callers)
for func, source in second_stats.stats.items():
if func not in first_stats.stats:
func = list(func)
func[0] = '* ' + func[0]
func = tuple(func)
first_stats.stats[func] = source
return first_stats
def print_difference(stats):
output_stream = StringIO()
stats.stream = output_stream
stats.sort_stats( "time" )
stats.print_stats()
return output_stream.getvalue()
def average(stats, average_count):
stats.total_calls /= average_count
stats.prim_calls /= average_count
stats.total_tt /= average_count
for func, source in stats.stats.items():
cc, nc, tt, ct, callers = source
stats.stats[func] = (cc/average_count, nc/average_count, tt/average_count, ct/average_count, callers)
return stats
def profile_something(profile_function, average_count, iterations_count):
output_stream = StringIO()
profiler_status = pstats.Stats( stream=output_stream )
for index in range(average_count):
profiler = cProfile.Profile()
profiler.enable()
profile_function( iterations_count )
profiler.disable()
profiler_status.add( profiler )
print( 'Profiled', '{0:7.3f}'.format( profiler_status.total_tt ), 'seconds at', '%3d' % (index + 1),
'for', profile_function.__name__, flush=True )
average( profiler_status, average_count )
profiler_status.sort_stats( "time" )
profiler_status.print_stats()
return "\n Profile results for %s\n%s" % ( profile_function.__name__, output_stream.getvalue() ), profiler_status
def run_profiling(first_function, second_function, average_count, iterations_count):
first_function_results, first_function_stats = profile_something( first_function, average_count, iterations_count )
second_function_results, second_function_stats = profile_something( second_function, average_count, iterations_count )
total_difference = print_difference( difference( first_function_stats, second_function_stats ) )
time_difference = first_function_stats.total_tt - second_function_stats.total_tt
output = 2500
output_stream = StringIO()
print( first_function_results[0:output], file=output_stream )
print( '\n', second_function_results[0:output], file=output_stream )
print( '\n\nTotal difference\n', total_difference[0:output], file=output_stream )
return ( ( time_difference, first_function_stats.total_tt, second_function_stats.total_tt,
first_function.__name__, second_function.__name__, format(iterations_count, ',d') ),
output_stream.getvalue() )
def get_loggging_debug():
logging.Logger.manager.loggerDict.clear()
log = logging.getLogger( "benchmark" )
if not log.hasHandlers():
date_format = "%H:%M:%S"
string_format = "%(asctime)s:%(msecs)010.6f - %(name)s.%(funcName)s:%(lineno)d - %(message)s"
stream = logging.StreamHandler()
formatter = logging.Formatter( string_format, date_format )
stream.setFormatter( formatter )
log.addHandler( stream )
return log
def logging_mod_log_debug_off(iterations_count):
log = get_loggging_debug()
log.setLevel( "WARNING" )
for index in range( iterations_count ):
log.debug( 'Message' )
def standardlog_fastdebug_off(iterations_count):
class ClassName(object):
def __init__(self, is_debug):
self.is_debug = is_debug
debugger = ClassName( False )
for index in range( iterations_count ):
if debugger.is_debug:
print( 'Message' )
results= []
results.append( run_profiling( standardlog_fastdebug_off, logging_mod_log_debug_off, 1, 10000000 ) )
print('\n\nResults details:')
for index, result in enumerate( results, start=1 ):
print( "\n%2d. %s\n" % ( index, result[1] ) )
print('Results resume:')
for index, result in enumerate( results, start=1 ):
print( '%2d. %10.5f %10.5f %10.5f %s - %s = %s iterations' % ( (index,) + result[0]) )
哪个在我的计算机上产生以下结果:
$ python3.7.3 benchmark_tests.py
Profiled 0.910 seconds at 1 for standardlog_fastdebug_off
Profiled 10.450 seconds at 1 for logging_mod_log_debug_off
Results details:
1.
Profile results for standardlog_fastdebug_off
5.0 function calls in 0.910 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
1.0 0.910 0.910 0.910 0.910 benchmark_tests.py:150(standardlog_fastdebug_off)
1.0 0.000 0.000 0.000 0.000 {built-in method builtins.__build_class__}
1.0 0.000 0.000 0.000 0.000 benchmark_tests.py:152(ClassName)
1.0 0.000 0.000 0.000 0.000 benchmark_tests.py:154(__init__)
1.0 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
Profile results for logging_mod_log_debug_off
20,000,058.0 function calls in 10.450 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
1.0 4.157 4.157 10.450 10.450 benchmark_tests.py:143(logging_mod_log_debug_off)
10,000,000.0 3.968 0.000 6.293 0.000 F:\Python\lib\logging\__init__.py:1362(debug)
10,000,000.0 2.325 0.000 2.325 0.000 F:\Python\lib\logging\__init__.py:1620(isEnabledFor)
1.0 0.000 0.000 0.000 0.000 benchmark_tests.py:129(get_loggging_debug)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:1222(getLogger)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:1342(__init__)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:1310(_clear_cache)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\threading.py:75(RLock)
5.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:212(_acquireLock)
3.0 0.000 0.000 0.000 0.000 {method 'clear' of 'dict' objects}
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:794(_addHandlerRef)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:813(__init__)
5.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:221(_releaseLock)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:1355(setLevel)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:513(__init__)
3.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:187(_checkLevel)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:842(createLock)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:1273(_fixupParents)
5.0 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.RLock' objects}
2.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__.py:727(__init__)
1.0 0.000 0.000 0.000 0.000 F:\Python\lib\logging\__init__
Total difference
-20,000,053.0 function calls in -9.540 seconds
Ordered by: internal time
ncalls tottime percall cumtime percall filename:lineno(function)
1.0 4.157 4.157 10.450 10.450 * benchmark_tests.py:143(logging_mod_log_debug_off)
10,000,000.0 3.968 0.000 6.293 0.000 * F:\Python\lib\logging\__init__.py:1362(debug)
10,000,000.0 2.325 0.000 2.325 0.000 * F:\Python\lib\logging\__init__.py:1620(isEnabledFor)
1.0 0.910 0.910 0.910 0.910 benchmark_tests.py:150(standardlog_fastdebug_off)
1.0 0.000 0.000 0.000 0.000 * benchmark_tests.py:129(get_loggging_debug)
1.0 0.000 0.000 0.000 0.000 {built-in method builtins.__build_class__}
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:1222(getLogger)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:1342(__init__)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:1310(_clear_cache)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\threading.py:75(RLock)
5.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:212(_acquireLock)
3.0 0.000 0.000 0.000 0.000 * ~:0(<method 'clear' of 'dict' objects>)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:794(_addHandlerRef)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:813(__init__)
5.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:221(_releaseLock)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:1355(setLevel)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:513(__init__)
3.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:187(_checkLevel)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:842(createLock)
1.0 0.000 0.000 0.000 0.000 * F:\Python\lib\logging\__init__.py:1273(_fixupParents)
5.0 0.000 0.000 0.000 0.000 * ~:0(<method 'acquire' of '_thread.R
Results resume:
1. -9.53998 0.91016 10.45015 standardlog_fastdebug_off - logging_mod_log_debug_off = 10,000,000 iterations
最重要的是基准报告最后一行中突出显示的总体时间。它说,在10
和logging
都被禁用的self.is_debug
百万次呼叫之后,0.91016
秒的print
日志花费了{{1} }}秒。慢了十倍。
在启用了10.45015
模块和logging
语句的情况下,我不打算将print
模块与logging
语句进行比较,因为print
模块提供了大量功能,那么我可以花更多钱时间比简单的logging
要短。
但是,禁用了日志记录模块,我希望它不会花费太多时间。还是我计算错误,让10.45015
模块花费0.91016
秒而不是每个10
百万{{1}}秒才花费{{1}}秒?
答案 0 :(得分:2)
我想这不是关于logging
模块,而是关于Python。考虑以下代码。四个简单的功能。它们之间的唯一区别是执行属性访问的次数。请参见下面的结果。
class A:
def __init__(self):
self.a = False
self.a_b = B()
class B:
def __init__(self):
self.b = False
self.b_c = C()
class C:
def __init__(self):
self.c = False
import time
N = 10_000_000
a = A()
def f_0(N):
start = time.time()
for dummy in range(N):
if False:
print(111) # never printed
stop = time.time()
return stop - start
def f_1(N):
start = time.time()
for dummy in range(N):
if a.a:
print(111)
stop = time.time()
return stop - start
def f_2(N):
start = time.time()
for dummy in range(N):
if a.a_b.b:
print(111)
stop = time.time()
return stop - start
def f_3(N):
start = time.time()
for dummy in range(N):
if a.a_b.b_c.c:
print(111)
stop = time.time()
return stop - start
result = {'x': list(), 'y': list()}
for run in range(10):
for x, f in enumerate([f_0, f_1, f_2, f_3]):
result['x'].append(str(x))
result['y'].append(f(N))
print(run)
import matplotlib.pyplot as plt
plt.figure()
plt.plot(result['x'], result['y'], '.')
plt.xlabel('argument access chain length')
plt.ylabel('time, seconds')
plt.show()
结果是(Python 3.6.8 / CPython / Win10 / CPU i5-7600):
原因是众所周知的-当cpu限制问题时,纯python速度很慢。例如,这就是为什么python不太适合用作数字磨床的原因。从我的经验来看,从自定义类和pandas
切换到内置字典使我的python代码(大量计算工作)几乎快了10倍。
此外,您还可以查看https://hackernoon.com/yes-python-is-slow-and-i-dont-care-13763980b5a1-适用于代码的日志记录部分。
答案 1 :(得分:1)
我真的无法复制您的结果。
如果绝对未配置日志记录模块,则它的速度非常快。
import logging
import argparse
import time
import sys
log = logging.getLogger("blep")
def prepare_off():
pass
def prepare_on():
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
def step(x):
log.info("now running step %s", x)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("-n", type=int, default=500_000)
ap.add_argument(
"--mode", required=True, choices=("off", "on")
)
args = ap.parse_args()
prepare = globals()[f"prepare_{args.mode}"]
prepare()
t0 = time.perf_counter_ns()
for x in range(args.n):
step(x)
t1 = time.perf_counter_ns()
print(
vars(args),
(t1 - t0) / args.n,
"ns/iter",
file=sys.stderr,
)
if __name__ == "__main__":
main()
$ time python3 so54470102.py --mode off >/dev/null
{'n': 500000, 'mode': 'off'} 541.53571 ns/iter
0.32 real 0.30 user 0.01 sys
$ time python3 so54470102.py --mode on >/dev/null
{'n': 500000, 'mode': 'on'} 26932.005352 ns/iter
13.52 real 13.01 user 0.47 sys
记住,在探查器下运行会产生很大的开销(当涉及更多调用时,例如在日志记录模块执行操作时,开销会更大):
$ time python3 -m cProfile -o /dev/null so54470102.py --mode on >/dev/null
{'n': 500000, 'mode': 'on'} 45733.711544 ns/iter
22.93 real 22.23 user 0.57 sys
$ time python3 -m cProfile -o /dev/null so54470102.py --mode off >/dev/null
{'n': 500000, 'mode': 'off'} 962.122468 ns/iter
0.54 real 0.52 user 0.01 sys