我有一个处理数据文件的python脚本:
out = open('result/process/'+name+'.res','w')
out.write("source,rssi,lqi,packetId,run,counter\n")
f = open('result/resultat0.res','r')
for ligne in [x for x in f if x != '']:
chaine = ligne.rstrip('\n')
tmp = chaine.split(',')
if (len(tmp) == 6 ):
out.write(','.join(tmp)+"\n")
f.close()
完整代码为here
我在几台计算机上使用此脚本,行为不一样。 在第一台计算机上,使用python 2.6.6,结果就是我所期望的。 但是,在其他文件(python 2.6.6,3.3.2,2.7.5)中,文件对象的write方法在处理的大部分时间内放置空字节而不是我想要的值。我得到了这个结果:
$ hexdump -C result/process/1.res
00000000 73 6f 75 72 63 65 2c 72 73 73 69 2c 6c 71 69 2c |source,rssi,lqi,|
00000010 70 61 63 6b 65 74 49 64 2c 72 75 6e 2c 63 6f 75 |packetId,run,cou|
00000020 6e 74 65 72 0a 00 00 00 00 00 00 00 00 00 00 00 |nter............|
00000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
*
0003a130 00 00 00 00 00 00 00 00 00 00 31 33 2c 36 35 2c |..........13,65,|
0003a140 31 34 2c 38 2c 39 38 2c 31 33 31 34 32 0a 31 32 |14,8,98,13142.12|
0003a150 2c 34 37 2c 31 37 2c 38 2c 39 38 2c 31 33 31 34 |,47,17,8,98,1314|
0003a160 33 0a 33 2c 34 35 2c 31 38 2c 38 2c 39 38 2c 31 |3.3,45,18,8,98,1|
0003a170 33 31 34 34 0a 31 31 2c 38 2c 32 33 2c 38 2c 39 |3144.11,8,23,8,9|
0003a180 38 2c 31 33 31 34 35 0a 39 2c 32 30 2c 32 32 2c |8,13145.9,20,22,|
请问您如何解决此问题?
答案 0 :(得分:2)
考虑以下因素:
global
的令人信服的理由。将参数传递给函数。这是一个(未经测试的)尝试重构您的代码以获得理智,假设您有足够的可用内存来保存特定标识符下的所有行。
如果在重构之后结果文件中有空字节,那么我们有合理的基础来继续调试。
import os
import re
from contextlib import closing
def list_files_to_process(directory='results'):
"""
Return a list of files from directory where the file extension is '.res',
case insensitive.
"""
results = []
for filename in os.listdir(directory):
filepath = os.path.join(directory,filename)
if os.path.isfile(filepath) and filename.lower().endswith('.res'):
results.append(filepath)
return results
def group_lines(sequence):
"""
Generator, process a sequence of lines, separated by a particular line.
Yields batches of lines along with the id from the separator.
"""
separator = re.compile('^A:(?P<id>\d+):$')
batch = []
batch_id = None
for line in sequence:
if not line: # Ignore blanks
continue
m = separator.match(line):
if m is not None:
if batch_id is not None or len(batch) > 0:
yield (batch_id,batch)
batch_id = m.group('id')
batch = []
else:
batch.append(line)
if batch_id is not None or len(batch) > 0:
yield (batch_id,batch)
def filename_for_results(batch_id,result_directory):
"""
Return an appropriate filename for a batch_id under the result directory
"""
return os.path.join(result_directory,"results-%s.res" % (batch_id,))
def open_result_file(filename,header="source,rssi,lqi,packetId,run,counter"):
"""
Return an open file object in append mode, having appended a header if
filename doesn't exist or is empty
"""
if os.path.exists(filename) and os.path.getsize(filename) > 0:
# No need to write header
return open(filename,'a')
else:
f = open(filename,'a')
f.write(header + '\n')
return f
def process_file(filename,result_directory='results/processed'):
"""
Open filename and process it's contents. Uses group_lines() to group
lines into different files based upon specific line acting as a
content separator.
"""
error_filename = filename_for_results('error',result_directory)
with open(filename,'r') as in_file, open(error_filename,'w') as error_out:
for batch_id, lines in group_lines(in_file):
if len(lines) == 0:
error_out.write("Received batch %r with 0 lines" % (batch_id,))
continue
out_filename = filename_for_results(batch_id,result_directory)
with closing(open_result_file(out_filename)) as out_file:
for line in lines:
if line.startswith('L') and line.endswith('E') and line.count(',') == 5:
line = line.lstrip('L').rstrip('E')
out_file.write(line + '\n')
else:
error_out.write("Unknown line, batch=%r: %r\n" %(batch_id,line))
if __name__ == '__main__':
files = list_files_to_process()
for filename in files:
print "Processing %s" % (filename,)
process_file(filename)