file_dict = {}
for calli, callj in itertools.product(feats,feats):
keys = seqd.keys()
if (not calli in keys) | ((not callj in keys)):
continue
else:
lst = []
##### this is the problematic part !!!!!!!!!
for jj, ii in itertools.product(seqd[callj], seqd[calli]):
if (jj - ii) > 0:
lst.append(1. / (jj - ii))
del jj,ii
entry = sum(lst)
del lst
file_dict[str(calli) + " " + str(callj) + " distance"] = entry
我在一些代码中使用上面的代码片段并迭代文件。我有某种内存泄漏。如果我只是注释掉我突出显示的循环中的那4行,我的代码保持在恒定的RAM~100mb。但是,当我取消注释时,它会超级快速达到~8-9GB。请帮忙!!
答案 0 :(得分:0)
对于上下文,我正在复制本文第5页的算法,生成一个系统调用依赖图。
以下是完整的代码修复。它基本上涉及在一个总和中使用列表理解,但没有围绕理解括起来。这样,项目在生成时被求和,而不是首先构建列表...
以下是代码:
def graph_maker_dict(feats, calls):
# get dictionary of present calls and list of indexes where they appear
seqd = defaultdict(list)
for v, k in enumerate(calls):
seqd[k].append(v)
# run calculations with list comprehensions
file_dict = {}
for calli, callj in itertools.product(feats,feats):
keys = seqd.keys()
if (not calli in keys) or ((not callj in keys)):
continue
else:
entry = sum(1. / (jj - ii) for jj in seqd[callj] for ii in seqd[calli] if (jj - ii) > 0)
file_dict[calli[:2] + " " + callj[:2] + " distance"[:2]] = entry
return file_dict