我正在尝试使用Heapq Merge合并一堆填充了排序整数的临时文件并写入输出文件。函数中的生成器返回值。 heapq.merge()看起来很好。程序构建但没有任何内容写入TestWriteOutput.txt文件。我尝试在打开文件的行之后写入输出文件(outf),但没有任何内容写入。 TestWriteOutput.txt已创建,但为空。
import os
import sys
import array
import tempfile
import heapq
cwd = os.getcwd()
print "Current Directory: %s" % cwd
#generator function to return one integer at a time from the tempfile
to a list
def numInFile(f):
while True:
#set input buffer to read 8KB
input_buffer = [int(x) for x in f.read(1024*2).split(',') if
x.strip().isdigit()]
#convert list of string values into integers
int_buffer = map(int, input_buffer)
f.seek(0) #reset flag
if not input_buffer:
break
for number in int_buffer:
yield number #returns one number at a time and appends to
iterator
with open('age.txt', 'r+') as inf:
with open('TestWriteOutput.txt', 'w+') as outf:
outf.write('some test data')
outf.write('some more data')
#iterator for heapq merge
sorted_list =[]
while True:
a = [int(x) for x in inf.read(20000).split(',') if
x.strip().isdigit()]
int_a = map(int, a)
if not a:
break
f = tempfile.TemporaryFile()
#sort and write to temp file
outf_array = sorted(int_a)
#####print outf_array
f.write(str(outf_array))
f.seek(0)
sorted_list.append(numInFile(f))
write_to_file = array.array('i')
#heapq merge function merges multiple ordered lists into a
single list
for x in heapq.merge(*sorted_list):
out_buffer = 1024*4
write_to_file.append(x)
if len(write_to_file) >= out_buffer:
write_to_file.tofile(outf)
del write_to_file[:]
if write_to_file:
write_to_file.tofile(outf)
答案 0 :(得分:0)
问题是在numInFile
函数中,您将在每次迭代中将文件指针重置回文件的开头。这使numInFile
成为取之不尽的发电机。
如果我将numInFile
更改为:
def numInFile(f):
while True:
#set input buffer to read 8KB
input_buffer = [int(x) for x in f.read(1024*2).split(',') if
x.strip().isdigit()]
#convert list of string values into integers
int_buffer = map(int, input_buffer)
if not input_buffer:
break
for number in int_buffer:
yield number #returns one number at a time and appends to iterator
并删除对输出文件的测试写入,程序成功完成。
>>> import array
>>> with open('TestWriteOutput.txt') as f:
... arr = array.array('i')
... arr.fromfile(f, 64)
...
>>> arr
array('i', [3, 3, 4, 5, 6, 6, 8, 8, 8, 8, 10, 11, 12, 12, 13, 17, 21, 25, 29, 30, 36, 37, 38, 39, 40, 44, 44, 46, 50, 50, 50, 52, 53, 53, 55, 56, 57, 59, 62, 63, 63, 64, 64, 65, 65, 66, 67, 68, 69, 70, 72, 73, 73, 74, 75, 75, 75, 75, 75, 76, 76, 77, 78, 79])