我正在尝试在DIC(数字图像相关性)结果中实现缺失点,由于材料(照片)被破坏而导致缺失。
我想在缺少该区域的地方放置点(无值)。 (源数据中的本机点)
此代码是我论文的一部分,我想找出由于拉伸试验中材料破坏而遗漏的那些点。来自DIC(数字图像相关)方法的数据,该方法测量样品表面的应变。当样品遭受局部破坏时,DIC软件将无法找到该区域的像素簇并最终丢失点。我有30多个标本,每个标本50帧。此数据的总计算时间约为一周。在版本1中显示的代码如下-我的计算机上没有任何更改可以运行约4分钟。版本2中的代码缩短了:从头到尾的语句MARK1被注释,版本2的注释行未注释,但仍需要3分45秒。
输入数据: https://github.com/MarekSawicki/data/blob/master/022_0_29-03-2018_e11_45.csv
import numpy as np
import os
# changing of folder
os.chdir("D:/Marek/doktorat/Badania_obrobione/test")
# load data from file
data = np.genfromtxt('022_0_29-03-2018_e11_45.csv', delimiter=',',dtype='float64')
# separation of coordintates (points) and values (both floats64)
# data in format: list of points (X,Y) and list of values
points = data[1:,1:3]
values = data[1:,4]
#shifting coordinates to zero (points coordinates might be negative or offset from 0) (-x0)
points[:,0] -= min(points[:,0])
points[:,1] -= min(points[:,1])
#scale factor K_scale
k_scale=2
points[:,0:2] *= k_scale
# vector reshape
values= np.reshape(values, [len(data)-1,1])
# sort the points to keep order in X direction
# points X are assumed as points[:,0]
# points Y are assumed as points[:,1]
array1 = np.ascontiguousarray(points)
a_view = array1.view(dtype=[('', array1.dtype)]*array1.shape[-1])
a_view.sort(axis=0)
points_sorted = array1
# Start of processing points
# a and b are respectively X and Y limits
a = np.int32(np.ceil(np.max(points[:,0])))+1
b = np.int32(np.ceil(np.max(points[:,1])))+1
# length 1 unit array cluster
array2=np.empty((0,2))
for m in range(0,3):
for n in range(0,3):
array2=np.append(array2,[[m*.5,n*.5]],axis=0)
# initialization of variables
k=0 # searching limits
bool_array_del=np.zeros((9,1), dtype=bool) # determine which line should be deleted - bool type
# array4 is a container of values which meets criteria
array4=np.empty((0,2))
# array7 is output container
array7=np.empty((0,2))
# main loop of concerned code:
for i in range(0,a): # X wise loop, a is a X limit
for i2 in range(0,b): # Y wise loop, a is a Y limit
array3 = np.copy(array2) # creating a cluster in range (i:i+1,i2:i2+1, step=0.5)
array3[:,0]+=i
array3[:,1]+=i2
# value container (each loop it should be cleaned)
array4=np.empty((0,2))
# container which determine data to delete (each loop it should be cleaned)
bool_array_del = np.empty((0,1),dtype=bool)
k=0 # set zero searching limits
# loop for searching points which meet conditions.
# I think it is the biggest time waster
#To make it shorter I deal with sorted points which allows me
#to browse part of array insted of whole array
#(that is why I used k parameter and if-break condition )
for i3 in range(k,points_sorted.shape[0]):
valx = points_sorted[i3,0]
valy = points_sorted[i3,1]
if valx>i-1:
k=i3
if valx>i+1.5:
break
#this condition check does considered point has X and coordinates is range : i-0.5:i+1.5
# If yes then append this coordinate to empty container (array4)
if np.abs(valx-(i+.5))<=1:
if np.abs(valy-(i2+.5))<=1:
array4=np.append(array4,[[valx,valy]],axis=0)
# (version 2) break
# Then postprocessing of selected points container - array4. To determine - do all point out of array4 should are close enough to be deleted?
if array4.shape[0]!=0:
# (version 2) pass
# begin(MARK1)
# round the values from array4 to neares .5 value
array5 = np.round(array4*2)/2
# if value from array5 are out of bound for proper cluster values then shift it to the closest correct value
for i4 in range(0,array5.shape[0]):
if array5[i4,0]>i+1:
array5[i4,0]= i+1
elif array5[i4,0]<i:
array5[i4,0]=i
if array5[i4,1]>i2+1:
array5[i4,1]=i2+1
elif array5[i4,1]<i2:
array5[i4,1]=i2
# substract i,i2 vector and double from value of array5 to get indices which should be deleted
array5[:,0]-=i
array5[:,1]-=i2
array5*=2
# create empty container with bool values - True - delete this value, False - keep
array_bool1=np.zeros((3,3), dtype=bool)
for i5 in range(0,array5.shape[0]):
# below condition doesn't work - it is too rough
#array_bool1[int(array5[i5,0]),int(array5[i5,1])]=True
# this approach works with correct results but I guess it is second the biggest time waster.
try:
array_bool1[int(array5[i5,0]),int(array5[i5,1])]=True
array_bool1[int(array5[i5,0]+1),int(array5[i5,1]-1)]=True
array_bool1[int(array5[i5,0]+1),int(array5[i5,1])+1]=True
array_bool1[int(array5[i5,0]+1),int(array5[i5,1])]=True
array_bool1[int(array5[i5,0]-1),int(array5[i5,1]+1)]=True
array_bool1[int(array5[i5,0]-1),int(array5[i5,1]-1)]=True
array_bool1[int(array5[i5,0]-1),int(array5[i5,1])]=True
array_bool1[int(array5[i5,0]),int(array5[i5,1]+1)]=True
array_bool1[int(array5[i5,0]),int(array5[i5,1]-1)]=True
except:
pass
# convert bool array to list
for i6 in range(0,array_bool1.shape[0]):
for i7 in range(0,array_bool1.shape[1]):
bool_array_del=np.append(bool_array_del, [[array_bool1[i6,i7]]],axis=0)
# get indices where bool list (unfotunatelly called bool_array_del) is true
result= np.argwhere(bool_array_del)
array6=np.delete(array3,result[:,0],axis=0)
# append it to output container
array7=np.append(array7,array6,axis=0)
# if nothing is found in loop for searching points which meet conditions append full cluster to output array
# end(MARK1)
else:
array7=np.append(array7,array3,axis=0)
此代码为我提供了版本1(图3)的令人满意的结果和版本2(图4)的可接受的结果
我是python和numpy的新手。你能告诉我我可以做些什么来加快我的代码的速度吗?我考虑过改用熊猫
答案 0 :(得分:1)
实际上,Jordan Singer提出了一种非常方便的解决方案-通过numba使用JIT编译。
时间减少从4分钟到4-5秒。另一方面,我被迫不使用np.append
,但是我使用了定长零数组,然后从数组中删除了零。
以下重做的代码:
import numpy as np
import matplotlib.pyplot as plt
import os
from numba import jit
import time
start = time.time()
# creating array4 from previous code changed to function with JIT decorator
@jit(nopython=True)
def Numba_Function_array4(if1,if2,source):
k=0 # set zero searching limits
index=0 # browse index for output array
rarray=np.zeros((10,2)) # fixed length output array
# loop for searching points which meet conditions.
# I think it is the biggest time waster - in fact is!
#To make it shorter I deal with sorted points which allows me
#to browse part of array insted of whole array
#(that is why I used k parameter and if-break condition )
for i3 in range(k,source.shape[0]):
valx = source[i3,0]
valy = source[i3,1]
if valx>if1-1:
k=i3
if valx>if1+1.5:
break
#this condition check does considered point has X and coordinates is range : i-0.5:i+1.5
# If yes then append this coordinate to empty container (array4)
if np.abs(valx-(if1+.5))<=1:
if np.abs(valy-(if2+.5))<=1:
rarray[index,:] = [valx,valy]
index+=1
return rarray
# changing of folder
os.chdir("D:/Marek/doktorat/Badania_obrobione/test")
cwd = os.getcwd()
# load data from file
data = np.genfromtxt('022_0_29-03-2018_e11_45.csv', delimiter=',',dtype='float64')
# separation of coordintates (points) and values (both floats64)
# data in format: list of points (X,Y) and list of values
points = data[1:,1:3]
values = data[1:,4]
#shifting coordinates to zero (points coordinates might be negative or offset from 0) (-x0)
points[:,0] -= min(points[:,0])
points[:,1] -= min(points[:,1])
#scale factor K_scale
ks=2
points[:,0:2] *= ks
# vector reshape
values= np.reshape(values, [len(data)-1,1])
# sort the points to keep order in X direction
# points X are assumed as points[:,0]
# points Y are assumed as points[:,1]
array1 = np.ascontiguousarray(points)
a_view = array1.view(dtype=[('', array1.dtype)]*array1.shape[-1])
a_view.sort(axis=0)
Input_points_sorted = array1
# Start of processing points
# a and b are respectively X and Y limits
a = np.int32(np.ceil(np.max(points[:,0])))+1
b = np.int32(np.ceil(np.max(points[:,1])))+1
# length 1 unit array cluster (step 0.5)
array2=np.empty((0,2))
for m in range(0,3):
for n in range(0,3):
array2=np.append(array2,[[m*.5,n*.5]],axis=0)
# array7 is output container
array7=np.empty((0,2))
# main loop of concerned code:
for i in range(0,a): # X wise loop, a is a X limit
for i2 in range(0,b): # Y wise loop, a is a Y limit
array3 = np.copy(array2) # creating a cluster in range (i:i+1,i2:i2+1, step=0.5)
array3[:,0]+=i
array3[:,1]+=i2
# function which contail the most time consuming part of the code
array4 = Numba_Function_array4(i,i2,Input_points_sorted)
# container which determine data to delete (each loop it should be cleaned)
bool_array_del = np.empty((0,1),dtype=bool)
# because in function umba_Function_array4 I created fixed length array I have to delete from the end zeros which indicates empty index
for i8 in range(9,-1,-1):
if array4[i8,0]==0:
array4=np.delete(array4,i8,axis=0)
# Then postprocessing of selected points container - array4. To determine - do all point out of array4 should are close enough to be deleted?
if array4.shape[0]!=0:
# round the values from array4 to neares .5 value
array5 = np.round(array4*2)/2
# if value from array5 are out of bound for proper cluster values then shift it to the closest correct value
for i4 in range(0,array5.shape[0]):
if array5[i4,0]>i+1:
array5[i4,0]= i+1
elif array5[i4,0]<i:
array5[i4,0]=i
if array5[i4,1]>i2+1:
array5[i4,1]=i2+1
elif array5[i4,1]<i2:
array5[i4,1]=i2
# substract i,i2 vector and double from value of array5 to get indices which should be deleted
array5[:,0]-=i
array5[:,1]-=i2
array5*=2
# create empty container with bool values - True - delete this value, False - keep
array_bool1=np.zeros((3,3), dtype=bool)
for i5 in range(0,array5.shape[0]):
try:
array_bool1[int(array5[i5,0]),int(array5[i5,1])]=True
array_bool1[int(array5[i5,0]+1),int(array5[i5,1]-1)]=True
array_bool1[int(array5[i5,0]+1),int(array5[i5,1])+1]=True
array_bool1[int(array5[i5,0]+1),int(array5[i5,1])]=True
array_bool1[int(array5[i5,0]-1),int(array5[i5,1]+1)]=True
array_bool1[int(array5[i5,0]-1),int(array5[i5,1]-1)]=True
array_bool1[int(array5[i5,0]-1),int(array5[i5,1])]=True
array_bool1[int(array5[i5,0]),int(array5[i5,1]+1)]=True
array_bool1[int(array5[i5,0]),int(array5[i5,1]-1)]=True
except:
pass
# convert bool array to list
for i6 in range(0,array_bool1.shape[0]):
for i7 in range(0,array_bool1.shape[1]):
bool_array_del=np.append(bool_array_del,[[array_bool1[i6,i7]]],axis=0)
# get indices where bool list (unfotunatelly called bool_array_del) is true
result= np.argwhere(bool_array_del)
array6=np.delete(array3,result[:,0],axis=0)
# append it to output container
array7=np.append(array7,array6,axis=0)
# if nothing is found in loop for searching points which meet conditions append full cluster to output array
else:
array7=np.append(array7,array3,axis=0)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))
print("Done!")
再次感谢约旦,问题已解决!