以下代码将获取'out.txt'的内容,并以新文件形式将其附加到'fixed_inv.txt'的末尾,'concat.txt'基于 共享路径。
在'concat.txt'文件中,我得到几行(成千上万)似乎在所述行的中间有一个随机的新行。
例如,一条线应该看起来像:
122 abc.def.com Failed to get CIFS shares with error code -2147024891. None Non-supported share access type. 0 Unkonwn NULL bluearc Different Security Type (1), Access is denied. (1354), Pruned. Different security type (21), The inherited access control list (ACL) or access control entry (ACE) could not be built. (3713), Could not convert the name of inner file or directory (27)
但相反,我有一些看起来像:
122 abc.def.com Failed to get CIFS shares with error code -2147024891. None
Non-supported share access type. 0 Unkonwn NULL bluearc Different Security Type (1), Access is denied. (1354), Pruned. Different security type (21), The inherited access control list (ACL) or access control entry (ACE) could not be built. (3713), Could not convert the name of inner file or directory (27)
我已经尝试在下面的代码中解决这个问题,但由于某种原因,代码运行但没有解决问题 - 这是将错误的半行退格或者去掉随机的新行。
class Error:
def __init__ (self, path, message): #self = new instance of class
self.path = path
self.message = message #error message
self.matched = False #has the path from out.txt been matched to the path of fixed_inv.txt?
def open_files(file1, file2, file3):
try:
f1 = open(file1, 'r')
except IOError:
print("Can't open {}".format(file1))
return None, None, None #you can't just open one file you have to open all
else:
try:
f2 = open(file2, 'r')
except IOError:
print("Can't open {}".format(file2))
f1.close()
return None, None, None
else:
try:
f3 = open(file3, 'w')
except IOError:
print("Can't open {}".format(file3))
f1.close()
f2.close()
return None, None, None
else:
return f1, f2, f3
def concat(file1, file2, file3):
errors = {} #key: path, value: instance of class Error
f1, f2, f3 = open_files(file1, file2, file3)
prevLine = "" #NEW
if f1 is not None: #if file one is able to open...
with f1:
for line_num, line in enumerate(f1): #get the line number and line
line = line.replace("\\", "/") #account for the differences in backslashes
tokens = line.strip().split(': ') #strip white spaces, split based on ':'
if len(tokens) != 3: #if there's less than two tokens...
print('Error on line {} in file {}: Expected three tokens, but found {}'.format(line_num + 1, file1, len(tokens))) #error
else: #NEW
if line.startswith('Non-supported'): #NEW
Prevline = line
Prevline = line.strip('\n') #NEW
else:
errors[tokens[1]] = Error(tokens[1], tokens[2])
with f2:
with f3:
for line_num, line in enumerate(f2):
line = line.replace("\\", "/").strip() #account for the differences in backslashes
tokens_2 = line.strip().split('\t') #strip white spaces, split based on tab
if len(tokens_2) < 4: #if we are unable to obtain the path by now since the path should be on 3rd or 4th index
print('Error on line {} in file {}: Expected >= 4 tokens, but found {}'.format(line_num + 1, file2, len(tokens_2)))
f3.write('{}\n'.format(line))
else: #if we have enough tokens to find the path...
if tokens_2[3] in errors: #if path is found in our errors dictionary from out.txt...
line.strip('\n')
path = tokens_2[3] #set path to path found
msg = errors[path].message #set the class instance of the value to msg
errors[path].matched = True #paths have been matched
f3.write('{}\t{}\n'.format(line, msg)) #write the line and the error message to concat
else: #if path is NOT found in our errors dictionary from out.txt...
f3.write('{}\t{}\n'.format(line, 'None'))
print('Error on line {} in file {}: Path {} not matched'.format(line_num + 1, file2, tokens_2[3])) #found in fixed_inv.txt,
#but not out.txt
"""for e in errors: #go through errors
if errors[e].matched is False: #if no paths have been matched
print('Path {} from {} not matched in {}'.format(errors[e].path, file1, file2)) #found in out.txt, but not in fixed_inv
f3.write('{}\t{}\n'.format(line, 'No error present'))
def main():
file1 = 'out.txt'
file2 = 'fixed_inv.txt'
file3 = 'test_concat.txt'
concat(file1, file2, file3)
if __name__ == '__main__':
main()
任何想法/建议将不胜感激!谢谢。
答案 0 :(得分:0)
在编写之前尝试替换换行符。
<强>实施例强>
f3.write('{}\n'.format(line.strip().replace("\n", "")))
f3.write('{}\t{}\n'.format(line.strip().replace("\n", ""), msg.replace("\n", "")))
f3.write('{}\t{}\n'.format(line.strip().replace("\n", ""), 'None'))
答案 1 :(得分:0)
如果你能在输出端解决这个问题,那么它显然会更容易和更强大。但如果你做不到,你所做的就是朝着正确方向迈出的一步。你只想:
prevline + line
代替line
。 prevline = “”
。此外,您可能希望将此逻辑包装在可以重用的生成器函数中。像这样:
def tokenizing(lines):
prevline = ""
for line in lines:
line = prevline + line
line = line.strip_logic_goes_here()
tokens = tokenize_logic_goes_here(line)
if len(tokens) > REQUIRED_TOKENS:
raise AppropriateException()
elif len(tokens) == REQUIRED_TOKENS:
yield line, tokens
prevline = ""
else:
prevline = line
if not prevline: return
tokens = tokenize_logic_goes_here()
if len(tokens) != REQUIRED_TOKENS:
raise AppropriateException()
yield line, tokens
然后你可以写;
for line, tokens in tokenizing(f1):