我试图解析一个如下所示的.txt文件:
-------------------------------------------------------------------------------
Compare Results
Compare Directory 1 : /data/Run_288/bitmaps
Compare Directory 2 : /data/Run_301/bitmaps
-------------------------------------------------------------------------------
idx, Filename , Exact, F3x3, F5x5, F7x7, Threshold, P/F
-------------------------------------------------------------------------------
1, ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00001.tif, 0, 0, 0, 0, 0, PASS
2, ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00002.tif, 0, 0, 0, 0, 0, PASS
-------------------------------------------------------------------------------
Bitmap Compare FAILURE !!! Threshold Exceeded : Threshold Values : Exact = 0 : Fuzzy 3x3 = 200 : Fuzzy 5x5 = 100 : Fuzzy 7x7 = 50 : Threshold 7x7 = 0
3, MIME_Test3_Job_setup__600X600_50_default_default_PPST56_003.mjm_20190521213826_00001.tif, 2083, 1180, 650, 262, 52, FAIL
-------------------------------------------------------------------------------
我需要获取具有以下格式的xml:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Suite date="2019-05-27T10:47:03" compareDir1="/data/Run_288/bitmaps" compareDir2="/data/Run_301/bitmaps">
<Test name="ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00001.tif" result="pass">
</Test>
<Test name="ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00002.tif" result="pass">
</Test>
<Test name="MIME_Test3_Job_setup__600X600_50_default_default_PPST56_003.mjm_20190521213826_00001.tif" result="crash">
</Test>
</Suite>
这是应该工作的代码。问题是那不起作用,并且我不了解为什么我的python小知识,有人可以帮我吗?! 谢谢!
import xml.etree.ElementTree as ET
root = ET.Element('Suite')
with open('file3.txt') as f:
lines = f.read().splitlines()
print(lines)
#add first subelement
celldata = ET.SubElement(root, 'Test')
import itertools as it
#for every line in input file
#group consecutive dedup to one
for line in it.groupby(lines):
line=line[0]
#if its a break of subelements - that is an empty space
if not line:
#add the next subelement and get it as celldata
celldata = ET.SubElement(root, 'test')
else:
#otherwise, split with : to get the tag name
tag = line.split(",")
#format tag name
el=ET.SubElement(celldata,tag[1])
print(tag[1])
print(tag[7])
tag=' '.join(tag[1]).strip()
if 'PASS' in line:
tag = line.split(",")[-1].strip()
elif 'FAILURE' in line:
splist = filter(None,line.split(" "))
tag = splist[splist.index(',')+1]
el.text = tag
#prettify xml
import xml.dom.minidom as minidom
formatedXML = minidom.parseString(
ET.tostring(
root)).toprettyxml(indent=" ",encoding='utf-8').strip()
# Display for debugging
print formatedXML
#write the formatedXML to file.
with open("results.xml","w+") as f:
f.write(formatedXML)
答案 0 :(得分:1)
为此,我将使用正则表达式。我的看法:
data = '''-------------------------------------------------------------------------------
Compare Results
Compare Directory 1 : /data/Run_288/bitmaps
Compare Directory 2 : /data/Run_301/bitmaps
-------------------------------------------------------------------------------
idx, Filename , Exact, F3x3, F5x5, F7x7, Threshold, P/F
-------------------------------------------------------------------------------
1, ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00001.tif, 0, 0, 0, 0, 0, PASS
2, ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00002.tif, 0, 0, 0, 0, 0, PASS
-------------------------------------------------------------------------------
Bitmap Compare FAILURE !!! Threshold Exceeded : Threshold Values : Exact = 0 : Fuzzy 3x3 = 200 : Fuzzy 5x5 = 100 : Fuzzy 7x7 = 50 : Threshold 7x7 = 0
3, MIME_Test3_Job_setup__600X600_50_default_default_PPST56_003.mjm_20190521213826_00001.tif, 2083, 1180, 650, 262, 52, FAIL
-------------------------------------------------------------------------------'''
import re
dirs = []
for d in re.findall('Compare Directory\s+(\d+)\s*:\s*(.*?)$', data, flags=re.DOTALL|re.MULTILINE):
dirs += [d]
passes = []
fails = []
for line in data.split('\n'):
for p in re.findall('(\d+,\s+(.*?),.*?PASS)$', line):
passes += [p]
for f in re.findall('(\d+,\s+(.*?),.*?FAIL)$', line):
fails += [f]
s = f'''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Suite date="2019-05-27T10:47:03" compareDir1="{dirs[0][1]}" compareDir2="{dirs[1][1]}">
'''
for p in passes:
s += f''' <Test name="{p[1]}" result="pass">
</Test>
'''
for fail in fails:
s += f''' <Test name="{fail[1]}" result="crash">
</Test>
'''
s += '''</Suite>'''
print(s)
打印:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Suite date="2019-05-27T10:47:03" compareDir1="/data/Run_288/bitmaps" compareDir2="/data/Run_301/bitmaps">
<Test name="ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00001.tif" result="pass">
</Test>
<Test name="ASCII_APPE_600X450_150_colorManBasic2.blackGrayReproductionImage_0_2p_color_test_four_object.pdf_20190522005734_00002.tif" result="pass">
</Test>
<Test name="MIME_Test3_Job_setup__600X600_50_default_default_PPST56_003.mjm_20190521213826_00001.tif" result="crash">
</Test>
</Suite>