Question

我有一个处理数据文件的python脚本：

out = open('result/process/'+name+'.res','w')
out.write("source,rssi,lqi,packetId,run,counter\n")
f = open('result/resultat0.res','r')
for ligne in [x for x in f if x != '']:
    chaine = ligne.rstrip('\n')
    tmp = chaine.split(',')
    if (len(tmp) == 6 ):
        out.write(','.join(tmp)+"\n")
f.close()

完整代码为here

我在几台计算机上使用此脚本，行为不一样。在第一台计算机上，使用python 2.6.6，结果就是我所期望的。但是，在其他文件（python 2.6.6,3.3.2,2.7.5）中，文件对象的write方法在处理的大部分时间内放置空字节而不是我想要的值。我得到了这个结果：

$ hexdump -C result/process/1.res
00000000  73 6f 75 72 63 65 2c 72  73 73 69 2c 6c 71 69 2c  |source,rssi,lqi,|
00000010  70 61 63 6b 65 74 49 64  2c 72 75 6e 2c 63 6f 75  |packetId,run,cou|
00000020  6e 74 65 72 0a 00 00 00  00 00 00 00 00 00 00 00  |nter............|
00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
0003a130  00 00 00 00 00 00 00 00  00 00 31 33 2c 36 35 2c  |..........13,65,|
0003a140  31 34 2c 38 2c 39 38 2c  31 33 31 34 32 0a 31 32  |14,8,98,13142.12|
0003a150  2c 34 37 2c 31 37 2c 38  2c 39 38 2c 31 33 31 34  |,47,17,8,98,1314|
0003a160  33 0a 33 2c 34 35 2c 31  38 2c 38 2c 39 38 2c 31  |3.3,45,18,8,98,1|
0003a170  33 31 34 34 0a 31 31 2c  38 2c 32 33 2c 38 2c 39  |3144.11,8,23,8,9|
0003a180  38 2c 31 33 31 34 35 0a  39 2c 32 30 2c 32 32 2c  |8,13145.9,20,22,|

请问您如何解决此问题？

Answer 1

考虑以下因素：

在编程python的十多年中，我从未遇到使用global的令人信服的理由。将参数传递给函数。
为确保文件在完成时关闭，请使用with statement。

这是一个（未经测试的）尝试重构您的代码以获得理智，假设您有足够的可用内存来保存特定标识符下的所有行。

如果在重构之后结果文件中有空字节，那么我们有合理的基础来继续调试。

import os
import re
from contextlib import closing

def list_files_to_process(directory='results'):
  """
  Return a list of files from directory where the file extension is '.res',
  case insensitive.
  """
  results = []
  for filename in os.listdir(directory):
    filepath = os.path.join(directory,filename)
    if os.path.isfile(filepath) and filename.lower().endswith('.res'):
      results.append(filepath)
  return results

def group_lines(sequence):
  """
  Generator, process a sequence of lines, separated by a particular line.
  Yields batches of lines along with the id from the separator.
  """
  separator = re.compile('^A:(?P<id>\d+):$')
  batch = []
  batch_id = None
  for line in sequence:
    if not line: # Ignore blanks
      continue
    m = separator.match(line):
    if m is not None:
      if batch_id is not None or len(batch) > 0:
        yield (batch_id,batch)
      batch_id = m.group('id')
      batch = []
    else:
      batch.append(line)
  if batch_id is not None or len(batch) > 0:
    yield (batch_id,batch)

def filename_for_results(batch_id,result_directory):
  """
  Return an appropriate filename for a batch_id under the result directory
  """
  return os.path.join(result_directory,"results-%s.res" % (batch_id,))

def open_result_file(filename,header="source,rssi,lqi,packetId,run,counter"):
  """
  Return an open file object in append mode, having appended a header if 
  filename doesn't exist or is empty
  """
  if os.path.exists(filename) and os.path.getsize(filename) > 0:
    # No need to write header
    return open(filename,'a')
  else:
    f = open(filename,'a')
    f.write(header + '\n')
    return f

def process_file(filename,result_directory='results/processed'):
  """
  Open filename and process it's contents. Uses group_lines() to group
  lines into different files based upon specific line acting as a
  content separator.
  """
  error_filename = filename_for_results('error',result_directory)
  with open(filename,'r') as in_file, open(error_filename,'w') as error_out:
    for batch_id, lines in group_lines(in_file):
      if len(lines) == 0:
        error_out.write("Received batch %r with 0 lines" % (batch_id,))
        continue
      out_filename = filename_for_results(batch_id,result_directory)
      with closing(open_result_file(out_filename)) as out_file:
        for line in lines:
          if line.startswith('L') and line.endswith('E') and line.count(',') == 5:
            line = line.lstrip('L').rstrip('E')
            out_file.write(line + '\n')
          else:
            error_out.write("Unknown line, batch=%r: %r\n" %(batch_id,line))

if __name__ == '__main__':
  files = list_files_to_process()
  for filename in files:
    print "Processing %s" % (filename,)
    process_file(filename)

在文件中写入空字节而不是正确的字符串

1 个答案: