我最近创建了python脚本,解析了与给定模式匹配的多个xml文件,并将结果写入.csv文件(参见下面的代码)。 我想将下面的代码分解为类和函数,如下所示: 类: 功能1 - >将文件路径加载到列表中; 功能2 - >将结果写入.csv文件
有什么建议吗?非常感谢您的帮助。
#
# Import Python modules
#
import os
import fnmatch
import sys
import datetime
import xml.etree.ElementTree as ET
#
# Define rootpath and pattern
#
rootpath = '/Users/USER_ONE/XML_Test_Data'
pattern = 'data-*'
#
# Search for files in directory and add the selection into list
#
List = []
for root, dors, files in os.walk(rootpath):
for filename in fnmatch.filter(files, pattern):
List.append(os.path.join(root, filename))
print('')
print('=====================================')
print('List of files matching the criteria')
print('=====================================')
print('')
#
# Print files matching the criteria one by one
#
count = 0
for item in List:
print(item)
count += 1
print('')
print(str(count) +' files retrieved')
#
print('')
print('=====================================')
print('System now extracts data from XML files above into .CSV file')
print('=====================================')
##
## Get current Time
curr_time = str(datetime.datetime.now())
##
## Open a file - file gets created in CWD
outfile = "XML-Output-"+curr_time+".CSV"
fo = open(outfile, "a")
#
# Write field names
fo.write('UID;TITLE;ACTOR;DIRECTOR;COUNTRY;FORMAT;BITRATE;FPS;ASPECT RATIO;PATH'+'\n')
#
for item in List:
## Set Variables for XML parsing
tree = ET.parse(item)
root = tree.getroot()
#print(root)
product = root.find('uid')
##
## Set Node variables
##
## get UID
if 'uid' in root.attrib:
uid = root.attrib.get('uid')
else:
uid = ""
##
## Get title
title = root[0].text
##
## Get actor
actor = root[1].text
##
## Get director
director = root[2].text
##
## Get country
country = root[3].text
##
## Get elements inside ASSET node
##
## Get Format
format = root[4][0].text
##
## Get Bitrate
bitrate = root[4][1].text
##
## Get fps
fps = root[4][2].text
##
## Get Aspect ratio
aspect_r = root[4][3].text
##
## Path = merge of Filepath + uid + format
filepath = item
#
# Define extract + ignore ascii encoding
line_extr = (uid + ';' + title + ';' + actor + ';' + director + ';' + country + ';' + format + ';' + bitrate + ';' + fps + ';' + aspect_r+ ';' + filepath + '-' + uid + '-' + format).encode('ascii', 'ignore').decode('ascii')
##
## Write output with line end character
fo.write(line_extr + '\n')
##
fo.close()
print('')
print('*** Data extracted successfully! ***')
print('*** Check results in: '+ outfile + ' ***')