我正在尝试将大型XML文件拆分为许多XMl文件。我使用了 xml.etree.ElementTree 。
我的代码: ` import os,sys 导入日志记录 来自shutil import copyfile
import xml.etree.ElementTree as ET
from lxml import etree
import parametres
import dijsplitercoder as DijSpliterCoder
import savetodb
class BigXmlSpliter:
def __init__(self, xmlFileName=""):
logging.debug('BXMLSpliter : ' + xmlFileName)
print('BXMLSpliter : ' + xmlFileName + ' ...')
self._xmlFileName = xmlFileName
self.big_xml_data = []
savetodb.data_to_insert.append(self.big_xml_data)
def split(self):
"""
Fonction principale de splitage d'un fichier BIGXML
Raises: IOError: Erreur d'écriture des fichiers splités.
"""
if not os.path.isfile(self._xmlFileName):
# print(self._xmlFileName,'N\'existe pas!')
logging.error('split - Fichier introuvable ' + self._xmlFileName)
return False
sequence = 0
header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \
"<factureMobile xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemLocation=\"" \
"facturesMobileV6.xsd\">\n"
footer = "</factureMobile>"
try:
# Déplacement vers le repertoire de travail (Work) en changeant les préfixes des fichiers
destFileName = parametres.workDir + '/' + os.path.basename(self._xmlFileName.replace(
parametres.prefixProfileXML, parametres.prefixProfileBigXml))
os.rename(self._xmlFileName, destFileName)
os.rename(self._xmlFileName[:-4] + '.jrn', destFileName[:-4] + '.dij')
# copyfile(self._xmlFileName, destFileName)
# copyfile(self._xmlFileName[:-4]+'.jrn', destFileName[:-4]+'.dij')
# dijfilename = DijSpliterCoder.DijSpliterCoder(self._xmlFileName[:-4]+'.dij')
self._DijSpliterCoder = DijSpliterCoder.DijSpliterCoder(destFileName[:-4] + '.dij', self.big_xml_data)
# print('xmlFileName: ',self._xmlFileName)
with open(destFileName, 'rb') as f:
context = ET.iterparse(f, events=("end",))
for event, elem in context:
if elem.tag in parametres.splitTags:
# print('SubFile: ', xmlfilename+"_"+"{0:0=5d}".format(sequence)+'.xml')
fxml = open(destFileName + "_" + "{0:0=5d}".format(sequence) + '.xml', 'wb')
fxml.write(bytes(header, 'UTF-8'))
content = ET.tostring(elem, encoding="utf-8")
fxml.write(content)
# fxml.write(bytes(content, 'UTF-8'))
fxml.write(bytes(footer, 'UTF-8'))
fxml.close()
for child in elem:
if child.tag == 'infoClient':
logging.debug('NClin en cours de traitement : ' + child.attrib['custCode'])
self._DijSpliterCoder.extract_ncli_to_file(child.attrib['custCode'],
destFileName + "_" + "{0:0=5d}".format(
sequence) + '.jrn')
break
self._DijSpliterCoder.setXMLTotalPages(destFileName + "_" + "{0:0=5d}".format(sequence) + '.xml')
sequence += 1
elem.clear()
del context
except Exception as e:
# xmlFileName.close()
print('Quelque chose s\'est mal passée!!')
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
filename = destFileName + "_" + "{0:0=5d}".format(sequence) + '.xml'
logging.debug("Error creating splited file {} . Detail: {}, {}, {}, {},".format(filename, e, exc_type, fname,
exc_tb.tb_lineno))
sys.exit(10)
` 它有效,但是当我有一个非常大的元素时,我在mthed tostring 的xml.etree.ElementTree中的关闭文件上进行了I / O操作。 我该怎么办才能修复它?如果它与内存有关,我该如何优化这个过程呢?