def getneighbors(cpdlist,rxnid):
tmplist2 = []
for cpd in cpdlist:
#tmp = [x for x in self.traindf if cpd in x[1]]
#tmplist = [x[1] for x in tmp if rxnid != x[0]]
f = np.frompyfunc(lambda x: cpd in x,1,1)
tmp = traindf[np.where(f(self.traindf))[0]]
tmplist2 = tmplist2 + [x[1] for x in tmp if rxnid != x[0]]
rloc = [x.split('/') for x in tmplist2]
flat2 = list(itertools.chain.from_iterable(rloc2))
traindf看起来像
array([['RXN-A','WATER/Glucopyranose',
'RXN-B','1-3-beta-D-Glucans',
'RXN-C','WATER/HYDROGEN-PEROXIDE'
'RXN-D','HYDROGEN-PEROXIDE'
'RXN-E','Glucopyranose/NADPH']]
cpdlist = ['WATER','Glucopyranose']
rxnid ='RXN-A'
tmplist2为['WATER / HYDROGEN-PEROXIDE','Glucopyranose / NADPH']
flat2为['WATER','HYDROGEN-PEROXIDE','Glucopyranose','NADPH']
问题: 上面的代码可以正常工作,但是要花40秒钟,因为我在大循环的每次迭代中都递归地遍历了此函数。有什么办法可以优化上面的功能,以便我的程序运行更快?我已经得出结论,这部分是瓶颈。
我也是numpy的新手,因为pandas str.contains对此太慢了。任何建议将不胜感激!