我正在努力提高旧功能的性能。它最初使用了许多for
块和列表追加,我认为使用numpy
数组和广播会使它更快。它没有,我的新功能比旧功能慢得多,我不确定为什么(也许它是np.append()
?)。
为什么我的新功能比旧功能慢,我怎样才能提高其性能?
import numpy as np
import time as t
def func1(aa, N1, N2, bb, c1, c2, c3):
"""
My original function, using for blocks and list appending.
"""
cc = []
for i, x in enumerate(aa[:N1]):
a1 = c1 + (bb[i][0] + bb[i][1] / c2) * c3
cc.append(x + a1)
for i, x in enumerate(aa[N1:(N1 + N2)]):
a2 = ((bb[N1 + i][0][0] + bb[N1 + i][0][1] / c2) -
(bb[N1 + i][1][0] + bb[N1 + i][1][1] / c2)) * c3
cc.append(x + a2)
for i, x in enumerate(aa[(N1 + N2):(N1 + N2 + N1)]):
a1 = c1 + (bb[i][0] + bb[i][1] / c2) * c3
cc.append(x + a1)
for i, x in enumerate(aa[(N1 + N2 + N1):(N1 + N2 + N1 + N2)]):
a2 = ((bb[N1 + i][0][0] + bb[N1 + i][0][1] / c2) -
(bb[N1 + i][1][0] + bb[N1 + i][1][1] / c2)) * c3
cc.append(x + a2)
cc = np.array(cc + list(aa[(2 * N1 + 2 * N2):]))
return cc
def func2(aa, bb, c1, c2, c3):
"""
My new function, using array broadcasting.
"""
a1 = c1 + (bb[0][:, 0] + bb[0][:, 1] / c2) * c3
a2 = ((bb[1][:, 0][:, 0] + bb[1][:, 0][:, 1] / c2) -
(bb[1][:, 1][:, 0] + bb[1][:, 1][:, 1] / c2)) * c3
a3 = np.vstack(np.hstack([a1, a2, a1, a2]))
cc = np.append(aa[:len(a3)] + a3, aa[len(a3):], axis=0)
return cc
# Data array with proper shape, as it is received by my actual code.
N1, N2 = np.random.randint(1, 10, 2)
aa = np.random.uniform(0., 1., (2 * (N1 + N2) + 6, 2000))
# I can manipulate the shape of this 'bb' array, but not the 'aa' array above.
# This is the shape I currently use for my new function.
bb = [[], []]
for _ in range(N1):
bb[0].append(np.random.uniform(-1., 1., 2))
for _ in range(N2):
bb[1].append([
np.random.uniform(-1., 1., 2), np.random.uniform(-1., 1., 2)])
bb = [np.array(_) for _ in bb]
# My old function uses a different shape for bb
bb_old = list(bb[0]) + list(bb[1])
# Time functions
tr_lst, t1, t2 = [], 0., 0.
for _ in range(10000):
c1, c2, c3 = np.random.uniform(10., 20., (3,))
# Old function
s = t.clock()
cc1 = func1(aa, N1, N2, bb_old, c1, c2, c3)
t1 += t.clock() - s
# New function
s = t.clock()
cc2 = func2(aa, bb, c1, c2, c3)
t2 += t.clock() - s
# Check that both functions are equal.
tr_lst.append((
np.all([np.allclose(x1, x2) for x1, x2 in zip(*[cc1, cc2])])))
print(np.all(tr_lst))
print("Old function: {:.2f}".format(t1))
print("New function: {:.2f}".format(t2))