这是代码。但是,此代码只能解析4个字符的Arabian。我希望它动态解析。所以,字符数无关紧要。因此,它可以根据现有字符的数量解析1个字符,2个字符或更多字符。
import xml.etree.ElementTree as ET
import os, glob
import csv
from time import time
#read xml path
xml_path = glob.glob('D:\1. Thesis FINISH!!!\*.xml')
#create file declaration for saving the result
file = open("parsing.csv","w")
#file = open("./%s" % ('parsing.csv'), 'w')
#create variable of starting time
t0=time()
#create file header
file.write('wordImage_id'+'|'+'paw1'+'|'+'paw2'+'|' + 'paw3' + '|' + 'paw4' + '|'+'font_size'+'|'+'font_style'+
'|'+'font_name'+'|'+'specs_effect'+'|'+'specs_height'+'|'+'specs_height'
+'|'+'specs_width'+'|'+'specs_encoding'+'|'+'generation_filtering'+
'|'+'generation_renderer'+'|'+'generation_type' + '\n')
for doc in xml_path:
print 'Reading file - ', os.path.basename(doc)
tree = ET.parse(doc)
#tree = ET.parse('D:\1. Thesis FINISH!!!\Image_14_AdvertisingBold_13.xml')
root = tree.getroot()
#get wordimage id
image_id = root.attrib['id']
#get paw 1 and paw 2
paw1 = root[0][0].text
paw2 = root[0][1].text
paw3 = root[0][2].text
paw4 = root[0][3].text
#get properties of font
for font in root.findall('font'):
size = font.get('size')
style = font.get('fontStyle')
name = font.get('name')
#get properties of specs
for specs in root.findall('specs'):
effect = specs.get('effect')
height = specs.get('height')
width = specs.get('width')
encoding = specs.get('encoding')
#get properties for generation
for generation in root.findall('generation'):
filtering = generation.get('filtering')
renderer = generation.get('renderer')
types = generation.get('type')
#save the result in csv
file.write(image_id + '|' + paw1 + '|' + paw2 + '|' + paw3 + '|' + paw4 + '|' + size + '|' +
style + '|' + name + '|' + effect + '|' + height + '|'
+ width + '|' + encoding + '|' + filtering + '|' + renderer + '|' + types + '\n')
#close the file
file.close()
#print time execution
print("process done in %0.3fs." % (time() - t0))