可以创建一个线程来保持队列中填充源文件text.txt中的行,以及当tagreset值等于tagdict的大小时,同时将结果写入输出文件tagfile.csv ?
tagdict = {}
tagreset = 10
with open('text.txt') as f:
for line in f:
if (len(tagdict) == tagreset):
tagfile = open("tagfile.csv","a")
for key in tagdict:
tagstring = ':' + (int(tagdict[key])-1) + '"\n"' + key + '","' + tagdict[key]
tagfile.write(tagstring)
tagfile.close()
tagdict = {}
q.put(line)
def worker(queue):
mutex.acquire()
try:
queue_full = True
while queue_full:
try:
for match in re.finditer('\<tag\>(.*?)\<\/tag\>',line):
try:
tagdict[match.group(0)] = match.start()
except:
print "no title matches found"
except Queue.Empty:
queue_full = False
finally:
mutex.release()
thread_count = 5
for i in range(thread_count):
t = Thread(target=worker, args = (q,))
t.start()