我有一个嵌套列表,其中包含不同的列表大小和类型。
def read(f,tree,objects):
Event=[]
for o in objects:
#find different features of one class
temp=[i.GetName() for i in tree.GetListOfBranches() if i.GetName().startswith(o)]
tempList=[] #contains one class of objects
for t in temp:
#print t
tempList.append(t)
comp=np.asarray(getattr(tree,t))
tempList.append(comp)
Event.append(tempList)
return Event
def main():
path="path/to/file"
objects= ['TauJet', 'Jet', 'Electron', 'Muon', 'Photon', 'Tracks', 'ETmis', 'CaloTower']
f=ROOT.TFile(path)
tree=f.Get("RecoTree")
tree.GetEntry(100)
event=read(f,tree,objects)
例如事件[0]的结果是
['TauJet', array(1), 'TauJet_E', array([ 31.24074173]), 'TauJet_Px', array([-28.27997971]), 'TauJet_Py', array([-13.18042469]), 'TauJet_Pz', array([-1.08304048]), 'TauJet_Eta', array([-0.03470514]), 'TauJet_Phi', array([-2.70545626]), 'TauJet_PT', array([ 31.20065498]), 'TauJet_Charge', array([ 1.]), 'TauJet_NTracks', array([3]), 'TauJet_EHoverEE', array([ 1745.89221191]), 'TauJet_size', array(1)]
如何将其转换为numpy数组?
注1:np.asarray(事件,“对象”)很慢。我正在寻找更好的方法。另外np.fromiter()不适用,因为我没有固定类型
注意2:我不知道我的活动的长度。
注3:如果让事情变得更容易,我也可以获得名字。
答案 0 :(得分:1)
你可以尝试这样的事情,但我不确定它的速度有多快。这会为第一行创建一个numpy记录数组。
data = event[0]
keys = data[0::2]
vals = data[1::2]
#there are some zero-rank arrays in there, so need to check for those,
#but I think just recasting them to a np.float should work.
temp = [np.float(v) for v in vals]
#you could also just create a np array from the line above with np.array(temp)
dtype={"names":keys, "formats":("f4")*len(vals)}
myArr = np.rec.fromarrays(temp, dtype=dtype)
#test it out
In [53]: data["TauJet_Pz"]
Out[53]: array(-1.0830404758453369, dtype=float32)
#alternatively, you could try something like this, which just creates a 2d numpy array
vals = np.array([[np.float(v) for v in row[1::2]] for row in event])
#now create a nice record array from that using the dtypes above
myRecordArray = np.rec.fromarrays(vals, dtype=dtype)