我对python和numpy的世界都很陌生,我需要一些关于编码的帮助:
两个数组中的“索引”列都是datetime64。 我想在数组N中找到与R数组中的所有索引匹配的所有行,并使最终数组具有匹配的内容,比如R2。
使事情变得复杂 - 在数组N中你有时找不到它的匹配, 所以这里有RLookUp,这是所有可能/允许的时间(最接近最佳匹配)
我尝试过映射,但我不是很成功。
非常感谢任何有关解决此问题的帮助。
import numpy as np
#######################################
### CONST / VAR / SETTINGS
laMinutes = 2 #look ahead minutes
lbMinutes = 0 #look back minutes
#chronological order, with seconds
N = np.array([
[np.datetime64('2013-05-02 10:00:00', 'm'), 1.11, 1.111],
[np.datetime64('2013-05-02 10:30:00', 'm'), 2.22, 2.222],
[np.datetime64('2013-05-02 11:02:00', 'm'), 3.33, 3.333],
[np.datetime64('2013-05-02 11:30:00', 'm'), 4.44, 4.444],
[np.datetime64('2013-05-02 12:05:00', 'm'), 5.55, 5.555]
])
#print(N)
#reversed (the newest first), with minutes
R = np.array([
[ 555, np.datetime64('2013-05-02 12:00')],
[ 333, np.datetime64('2013-05-02 11:00')],
[ 111, np.datetime64('2013-05-02 10:00')]
])
#print(R)
#######################################
### MAIN
RLookUp = np.empty( (0, 1), dtype='<M8[m]') #array for all possible times derived from R
#adds all possible look ahead outcomes
for row in range(0, len(R)):
for laMinute in range (-lbMinutes, laMinutes+1):
timeR = R[row,1] + np.timedelta64(laMinute,'m')
rowN = np.searchsorted(N[:, 0], timeR, side='l')
if rowN == len(N):
print(timeR, ' : ', 'NOT FOUND')
else:
print(timeR, ' : ', rowN)
RLookUp = np.append(RLookUp, np.array([[timeR]]), axis=0)
#TODO: sorting necessary ?
RLookUp = np.sort(RLookUp, axis=0)
print('\n RLookUp: \n', RLookUp)
#will not work, some RLookUp times does not exist in N
#mapRLookUp = dict( zip(RLookUp[:,0], (range(len(RLookUp)))))
#print('\n mapRLookUp: \n', mapRLookUp)
#N2 = np.array([ N[mapRLookUp[key], 0:3] for key in RLookUp[:,1] ])
#print('\n N2: \n', N2)
#R2 = np.hstack((R, N2))
#print('\n R2: \n', R2)
答案 0 :(得分:0)
循环开发速度更快,如果有人知道如何使用numpy编写相同的代码,请随意这样做
import numpy as np
#######################################
### CONST / VAR / SETTINGS
laMinutes = 10 #look ahead minutes
lbMinutes = 0 #look back minutes
#chronological order, with seconds
N = np.array([
[np.datetime64('2013-05-02 10:00:00', 'm'), 1.11, 1.111],
[np.datetime64('2013-05-02 10:02:00', 'm'), 2.22, 2.222],
[np.datetime64('2013-05-02 10:03:00', 'm'), 3.33, 3.333],
[np.datetime64('2013-05-02 11:05:00', 'm'), 4.44, 4.444],
[np.datetime64('2013-05-02 12:10:00', 'm'), 5.55, 5.555]
])
N = np.sort(N, axis=0)
#print(N)
#reversed (the newest first), with minutes
R = np.array([
[ 555, np.datetime64('2013-05-02 12:00')],
[ 333, np.datetime64('2013-05-02 10:00')],
[ 111, np.datetime64('2013-05-02 11:00')]
])
R = np.sort(R, axis=0)
#print(R)
#######################################
### MAIN
RN = np.empty( (0,5))
for rowR in range(0, len(R)):
for rowN in range(0, len(N)):
NTime = N[rowN][0]
RTime = R[rowR][1]
RTimeMax = RTime + np.timedelta64(laMinutes,'m')
#print(RTime, RTimeM, NTime)
if RTime <= NTime <= RTimeMax:
rowA = tuple( (R[rowR][1], N[rowN][0], N[rowN][1], N[rowN][2], R[rowR][0]) )
RN = np.append(RN, np.array([rowA]), axis=0)
break
RN = np.sort(RN, axis=0)
print (RN)