我制作了一个简短的示例程序来测试与线程模块结合的numpy函数。因此,我用不同大小的随机数组调用了np.dot函数。
总体调查结果基本符合预期。方法 test_serial 比方法 test_threading 快一点(因为开销更少?)。
尽管如此,我不明白为什么 threaded 点操作为什么一个接一个地执行,而不是同时执行。我确保内存使用不会有问题。
我期望的是
似乎numpy一次只能执行一个点运算。有人可以确认吗? 进一步的测试表明,np.multiply操作不是这种情况。
输出 np.dot :
--- start threaded test ---
--- start threads ---
start: 0
start: 1
start: 2
start: 3
start: 4
--- start threads done ---
-8.263092041015625
--- end --- 0
--- join thread ---
-16.34356927871704
--- end --- 1
--- join thread ---
-24.071669578552246
--- end --- 2
--- join thread ---
-23.95636224746704
--- end --- 3
--- join thread ---
-23.962880611419678
--- end --- 4
--- join thread ---
-24.32594323158264
--- end threaded test ---
输出乘以 np :
--- start threaded test ---
--- start threads ---
start: 0
start: 1
start: 2
start: 3
start: 4
--- start threads done ---
-0.007001638412475586
--- end --- 3
-0.007020235061645508
--- end --- 4
-0.3390059471130371
--- end --- 1
-0.3525271415710449
--- end --- 2
-0.3660862445831299
--- end --- 0
--- join thread ---
--- join thread ---
--- join thread ---
--- join thread ---
--- join thread ---
-0.36977648735046387
--- end threaded test ---
from threading import Thread, Lock
print_access = Lock()
import gc
def test_f1(arg_1, arg_2,out, num):
with print_access:
print("start:", num)
xx = time.time()
np.dot(arg_1, arg_2,out)
with print_access:
print(xx-time.time())
print("--- end --- ", num)
def test_f2(arg_1, arg_2,out, num):
with print_access:
print("start:", num)
xx = time.time()
np.multiply(arg_1, arg_2,out)
with print_access:
print(xx-time.time())
print("--- end --- ", num)
def test_f3(arg_1, arg_2, out, num):
with print_access:
print("start:", num)
xx = time.time()
np.matmul(arg_1, arg_2,out)
with print_access:
print(xx-time.time())
print("--- end --- ", num)
def test_serial(arg_1, arg_2, arg_3, arg_4,arg_5,arg_6,arg_7,arg_8, arg_9,arg_10,out_1, out_2, out_3, out_4, out_5):
test_f2(arg_1,arg_2,out_1, 0)
test_f2(arg_3,arg_4,out_2, 1)
test_f2(arg_5,arg_6,out_3, 2)
test_f2(arg_7,arg_8,out_4, 3)
test_f2(arg_9,arg_10,out_5, 4)
def test_thread(arg_1, arg_2, arg_3, arg_4,arg_5,arg_6,arg_7,arg_8, arg_9,arg_10,out_1, out_2, out_3, out_4, out_5):
t_1 = Thread(target = test_f2, args = (arg_1,arg_2,out_1,0))
t_2 = Thread(target = test_f2, args = (arg_3,arg_4,out_2,1))
t_3 = Thread(target = test_f2, args = (arg_5,arg_6,out_3,2))
t_4 = Thread(target = test_f2, args = (arg_7,arg_8,out_4,3))
t_5 = Thread(target = test_f2, args = (arg_9,arg_10,out_5,4))
print("--- start threads ---")
t_1.start()
t_2.start()
t_3.start()
t_4.start()
t_5.start()
print("--- start threads done ---")
t_1.join()
print("--- join thread ---")
t_2.join()
print("--- join thread ---")
t_3.join()
print("--- join thread ---")
t_4.join()
print("--- join thread ---")
t_5.join()
print("--- join thread ---")
if __name__ == "__main__":
x = 10000
y = 1000
array_1 = np.random.rand(x,x)
array_2 = np.random.rand(x,x)
array_3 = np.random.rand(x,x)
array_4 = np.random.rand(x,x)
array_5 = np.random.rand(x,x)
array_6 = np.random.rand(x,x)
array_7 = np.random.rand(y,y)
array_8 = np.random.rand(y,y)
array_9 = np.random.rand(y,y)
array_10 = np.random.rand(y,y)
out_1 = np.random.rand(x,x)
out_2 = np.random.rand(x,x)
out_3 = np.random.rand(x,x)
out_4 = np.random.rand(y,y)
out_5 = np.random.rand(y,y)
print("--- start ---")
xx = time.time()
test_serial(array_1, array_2,array_3, array_4,array_5, array_6,array_7, array_8,array_9, array_10, \
out_1,out_2,out_3, out_4, out_5)
print(xx-time.time())
print("--- end ---")
time.sleep(0.5)
gc.collect()
time.sleep(0.5)
print("--- start ---")
xx = time.time()
test_thread(array_1, array_2,array_3, array_4,array_5, array_6,array_7, array_8,array_9, array_10, \
out_1,out_2,out_3, out_4, out_5)
print(xx-time.time())
print("--- end ---")
```