这是我的代码:
def decode(filename):
with open(filename, "rb") as binary_file:
# Read the whole file at once
data = bytearray( binary_file.read())
for i in range(len(data)):
data[i] = 0xff - data[i]
with open("out.log", "wb") as out:
out.write(data)
我有一个大约10MB的文件,我需要翻转所有位来转换此文件,然后将其保存到新文件中。
使用我的代码翻译10MB文件大约需要1秒,而使用C只需不到1ms。
这是我的第一个python脚本。我不正确,如果使用字节数组。最耗时的代码是字节数组循环。
答案 0 :(得分:1)
如果可以选择使用numpy
库,那么使用它会更快 ★ ,因为它可以执行通过单个语句对所有字节进行操作。与使用numpy
之类的模块(在C中实现并针对数组处理进行优化)相比,在纯Python中对相对较大的数据量执行字节级操作本质上将相对较慢。
★,尽管在Python 2中不如在3中那么多(请参见下面的结果)。
以下是我为使用它与问题代码进行基准测试而建立的框架。看起来似乎很多代码,但是其中大多数只是进行性能比较的基础。
我鼓励其他回答此问题的人也使用它。
from __future__ import print_function
from collections import namedtuple
import os
import sys
from random import randrange
from textwrap import dedent
from tempfile import NamedTemporaryFile
import timeit
import traceback
N = 1 # Number of executions of each "algorithm".
R = 3 # Number of repetitions of those N executions.
UNITS = 1024 * 1024 # MBs
FILE_SIZE = 10 * UNITS
# Create test files. Must be done here at module-level to allow file
# deletions at end.
with NamedTemporaryFile(mode='wb', delete=False) as inp_file:
FILE_NAME_IN = inp_file.name
print('Creating temp input file: "{}", length {:,d}'.format(FILE_NAME_IN, FILE_SIZE))
inp_file.write(bytearray(randrange(256) for _ in range(FILE_SIZE)))
with NamedTemporaryFile(mode='wb', delete=False) as out_file:
FILE_NAME_OUT = out_file.name
print('Creating temp output file: "{}"'.format(FILE_NAME_OUT))
# Common setup for all testcases (executed prior to any Testcase specific setup).
COMMON_SETUP = dedent("""
from __main__ import FILE_NAME_IN, FILE_NAME_OUT
""")
class Testcase(namedtuple('CodeFragments', ['setup', 'test'])):
""" A test case is composed of separate setup and test code fragments. """
def __new__(cls, setup, test):
""" Dedent code fragment in each string argument. """
return tuple.__new__(cls, (dedent(setup), dedent(test)))
testcases = {
"user3181169": Testcase("""
def decode(filename, out_filename):
with open(filename, "rb") as binary_file:
# Read the whole file at once
data = bytearray(binary_file.read())
for i in range(len(data)):
data[i] = 0xff - data[i]
with open(out_filename, "wb") as out:
out.write(data)
""", """
decode(FILE_NAME_IN, FILE_NAME_OUT)
"""
),
"using numpy": Testcase("""
import numpy as np
def decode(filename, out_filename):
with open(filename, 'rb') as file:
data = np.frombuffer(file.read(), dtype=np.uint8)
# Applies mathematical operation to entire array.
data = 0xff - data
with open(out_filename, "wb") as out:
out.write(data)
""", """
decode(FILE_NAME_IN, FILE_NAME_OUT)
""",
),
}
# Collect timing results of executing each testcase multiple times.
try:
results = [
(label,
min(timeit.repeat(testcases[label].test,
setup=COMMON_SETUP + testcases[label].setup,
repeat=R, number=N)),
) for label in testcases
]
except Exception:
traceback.print_exc(file=sys.stdout) # direct output to stdout
sys.exit(1)
# Display results.
major, minor, micro = sys.version_info[:3]
bitness = 64 if sys.maxsize > 2**32 else 32
print('Fastest to slowest execution speeds using ({}-bit) Python {}.{}.{}\n'
'({:,d} execution(s), best of {:d} repetition(s)'.format(
bitness, major, minor, micro, N, R))
print()
longest = max(len(result[0]) for result in results) # length of longest label
ranked = sorted(results, key=lambda t: t[1]) # ascending sort by execution time
fastest = ranked[0][1]
for result in ranked:
print('{:>{width}} : {:9.6f} secs, relative speed: {:6,.2f}x, ({:8,.2f}% slower)'
''.format(
result[0], result[1], round(result[1]/fastest, 2),
round((result[1]/fastest - 1) * 100, 2),
width=longest))
# Clean-up.
for filename in (FILE_NAME_IN, FILE_NAME_OUT):
try:
os.remove(filename)
except FileNotFoundError:
pass
输出(Python 3):
Creating temp input file: "T:\temp\tmpw94xdd5i", length 10,485,760
Creating temp output file: "T:\temp\tmpraw4j4qd"
Fastest to slowest execution speeds using (32-bit) Python 3.7.1
(1 execution(s), best of 3 repetition(s)
using numpy : 0.017744 secs, relative speed: 1.00x, ( 0.00% slower)
user3181169 : 1.099956 secs, relative speed: 61.99x, (6,099.14% slower)
输出(Python 2):
Creating temp input file: "t:\temp\tmprk0njd", length 10,485,760
Creating temp output file: "t:\temp\tmpvcaj6n"
Fastest to slowest execution speeds using (32-bit) Python 2.7.15
(1 execution(s), best of 3 repetition(s)
using numpy : 0.017930 secs, relative speed: 1.00x, ( 0.00% slower)
user3181169 : 0.937218 secs, relative speed: 52.27x, (5,126.97% slower)