Python XML解析器 - 分解为类和函数

时间:2016-09-11 22:46:43

标签: python xml csv parsing

我最近创建了python脚本,解析了与给定模式匹配的多个xml文件,并将结果写入.csv文件(参见下面的代码)。 我想将下面的代码分解为类和函数,如下所示: 类: 功能1 - >将文件路径加载到列表中; 功能2 - >将结果写入.csv文件

有什么建议吗?非常感谢您的帮助。

#
# Import Python modules
#
import os
import fnmatch
import sys
import datetime
import xml.etree.ElementTree as ET
#
# Define rootpath and pattern
#
rootpath = '/Users/USER_ONE/XML_Test_Data'
pattern = 'data-*'
#
# Search for files in directory and add the selection into list
#
List = []
for root, dors, files in os.walk(rootpath):
    for filename in fnmatch.filter(files, pattern):
        List.append(os.path.join(root, filename))

print('')
print('=====================================')
print('List of files matching the criteria')
print('=====================================')
print('')
#
# Print files matching the criteria one by one
#
count = 0
for item in List:
    print(item)
    count += 1
print('')
print(str(count) +' files retrieved')
#
print('')
print('=====================================')
print('System now extracts data from XML files above into .CSV file')
print('=====================================')
##
## Get current Time
curr_time = str(datetime.datetime.now())
##
## Open a file - file gets created in CWD
outfile = "XML-Output-"+curr_time+".CSV"
fo = open(outfile, "a")
#
# Write field names
fo.write('UID;TITLE;ACTOR;DIRECTOR;COUNTRY;FORMAT;BITRATE;FPS;ASPECT RATIO;PATH'+'\n')
#
for item in List:
    ## Set Variables for XML parsing
    tree = ET.parse(item)
    root = tree.getroot()
    #print(root)
    product = root.find('uid')

    ##    
    ## Set Node variables   
    ##
    ## get UID
    if 'uid' in root.attrib:
        uid = root.attrib.get('uid')
    else:
        uid = ""
    ##
    ## Get title
    title = root[0].text
    ##
    ## Get actor
    actor = root[1].text
    ##
    ## Get director
    director = root[2].text
    ##
    ## Get country
    country = root[3].text
    ##
    ## Get elements inside ASSET node
    ##
    ## Get Format
    format = root[4][0].text
    ##
    ## Get Bitrate
    bitrate = root[4][1].text
    ##
    ## Get fps
    fps = root[4][2].text
    ##    
    ## Get Aspect ratio
    aspect_r = root[4][3].text
    ##
    ## Path = merge of Filepath + uid + format
    filepath = item
    #
    # Define extract + ignore ascii encoding
    line_extr = (uid + ';' + title + ';' + actor + ';' + director + ';' + country + ';' + format + ';' + bitrate + ';' + fps + ';' + aspect_r+ ';' + filepath + '-' + uid + '-' + format).encode('ascii', 'ignore').decode('ascii')
    ##
    ## Write output with line end character
    fo.write(line_extr + '\n')
    ##
fo.close()
print('')
print('*** Data extracted successfully! ***')
print('*** Check results in: '+ outfile + ' ***')

0 个答案:

没有答案