我正在实施Kosaraju的两遍算法,该算法可以计算有向图中的强连通分量。
我可以使用小输入数据获得正确的结果,但是当输入数据较大时
(70M txt,警告!!此文本文件的大小接近70M
,使用带有此URL的下载软件下载此大文件。如果您没有下载软件,可以在浏览器http://pan.baidu.com/s/1i5Hmf5N
中复制此网址
并下载它,</ p>
大约1小时后显示“pythonw.exe已停止工作”。 Python应该运行以获得正确的答案。
我该如何解决?是否有一些记忆问题?请帮我一个忙。
Here is the large data result:
我的代码在这里:
import time
import datetime
import sys
start = time.time()
print datetime.datetime.now()
with open('test.txt') as f:
#SCC
#a = [[int(x) for x in ln.split()] for ln in f]
data_set_u = []
data_set_v = []
for ln in f:
#print ln
#print type(ln)
#print len(ln)
if len(ln) >1:
u,v = ln.split()
u = int(u)
v = int(v)
data_set_u.append(u)
data_set_v.append(v)
f.close()
print 'open file time: '+ str(time.time() - start) + 's'
print datetime.datetime.now()
sys.setrecursionlimit((max(data_set_u+data_set_v)+ len(data_set_u))*100)
def DFS_Loop():
num = max(data_set_u+data_set_v)
start_time_DFS_Loop = time.time()
global t
t = 0
global s
s = None
global visited
visited = [False]* num
global leader
leader = [None] * num
global f
f = [None] * num
for i in range(num,0,-1):
#print i
#print (i in visited)
#if (i in visited)==False:
if visited[i-1] == False:
s = i
#print s
DFS(i)
print 'end with func DFS_Loop() time: '+ str(time.time() - start_time_DFS_Loop)+ 's'
print 'end with func DFS_Loop() whole time: '+ str(time.time() - start)+ 's'
#print data_set_u
#print data_set_v
def DFS(node):
start_time_DFS = time.time()
global t
visited[node-1] = True
#print visited
#print visited
leader[node-1] = s
#print leader
arc = []
arc = [data_set_v[i] for i,x in enumerate(data_set_u) if x==node]
#print arc
for i in arc:
#print arc
#print i
if visited[i-1]==0:
#print i
DFS(i)
t+=1
#print t
f[node-1] = t
#print f
print 'end with func DFS time: '+ str(time.time() - start_time_DFS)+ 's'
print 'end with func DFS whole time: '+ str(time.time() - start)+ 's'
DFS_Loop()
print 'DFS_Loop time: '+ str(time.time() - start)+ 's'
##reverse tail and head data
##
##
rev_u,rev_v = data_set_v,data_set_u
new_u = [None] * (len(rev_u))
new_v = [None] * (len(rev_v))
#print rev_v
#print rev_u
for i,val in enumerate(f):
#rev_u[rev_u.index(i+1)] = val
#print i+1,val
#rev_v[rev_v.index(i+1,0,len(rev_v))] = val
#print rev_v
#print i,val
for i_v,val_v in enumerate(rev_v):
if val_v == i+1:
#print val_v
new_v[i_v] = val
for i_u,val_u in enumerate(rev_u):
if val_u == i+1:
#print i_u,val_u
new_u[i_u] = val
#print new_u
#print new_v
data_set_u = new_u
data_set_v = new_v
#print data_set_u
#print data_set_v
print 'reverse data time: '+ str(time.time() - start)+ 's'
DFS_Loop()
print 'DFS_Loop time: '+ str(time.time() - start)+ 's'
#print leader
##calculate repeated times appearancing in leader list
##
##
count_list = [0]*len(leader)
indices = [0]*len(leader)
#for i_lea,val_lea in enumerate(leader):
i_count_list = 0
while len(leader) > 0:
#print i_lea,val_lea
count_list[i_count_list] = leader.count(leader[0])
#print 'count_list: '+ str(count_list)
indices = [i for i, x in enumerate(leader) if x == leader[0]]
#print 'indices: '+ str(indices)
for i in xrange(len(indices)):
#print 'leader before del: '+ str(leader)
del leader[leader.index(leader[0])]
#print 'leader after del: '+ str(leader)
#print 'leader: '+ str(leader)
i_count_list = i_count_list+1
#print 'i_count_list: ' + str(i_count_list)
print 'calc time: '+ str(time.time() - start)+ 's'
sorted_count_list = sorted(count_list, key=int, reverse=True)
print sorted_count_list[0:5]
print datetime.datetime.now()
这是小测试文件:
1 4
2 8
3 6
4 7
5 2
6 9
7 1
8 5
8 6
9 7
9 3
以下是小测试文件的正确部分结果:
calc time: 0.121000051498s
[3, 3, 3, 0, 0]
2017-01-19 08:07:44.802000