xml.etree.ElementTree tostring中关闭文件的I / O操作

时间:2017-03-13 15:34:56

标签: python xml optimization xml.etree

我正在尝试将大型XML文件拆分为许多XMl文件。我使用了 xml.etree.ElementTree

我的代码: `     import os,sys     导入日志记录     来自shutil import copyfile

import xml.etree.ElementTree as ET
from lxml import etree
import parametres
import dijsplitercoder as DijSpliterCoder
import savetodb

class BigXmlSpliter:

    def __init__(self, xmlFileName=""):
    logging.debug('BXMLSpliter : ' + xmlFileName)
    print('BXMLSpliter : ' + xmlFileName + ' ...')
    self._xmlFileName = xmlFileName
    self.big_xml_data = []
    savetodb.data_to_insert.append(self.big_xml_data)


    def split(self):
        """
            Fonction principale de splitage d'un fichier BIGXML
            Raises: IOError: Erreur d'écriture des fichiers splités.
                            """
        if not os.path.isfile(self._xmlFileName):
            # print(self._xmlFileName,'N\'existe pas!')
            logging.error('split - Fichier introuvable ' + self._xmlFileName)
            return False

        sequence = 0
        header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \
                 "<factureMobile xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemLocation=\"" \
                 "facturesMobileV6.xsd\">\n"
        footer = "</factureMobile>"

        try:
            # Déplacement vers le repertoire de travail (Work) en changeant les préfixes des fichiers
            destFileName = parametres.workDir + '/' + os.path.basename(self._xmlFileName.replace(
                parametres.prefixProfileXML, parametres.prefixProfileBigXml))
            os.rename(self._xmlFileName, destFileName)
            os.rename(self._xmlFileName[:-4] + '.jrn', destFileName[:-4] + '.dij')
            # copyfile(self._xmlFileName, destFileName)
            # copyfile(self._xmlFileName[:-4]+'.jrn', destFileName[:-4]+'.dij')

            # dijfilename = DijSpliterCoder.DijSpliterCoder(self._xmlFileName[:-4]+'.dij')
            self._DijSpliterCoder = DijSpliterCoder.DijSpliterCoder(destFileName[:-4] + '.dij', self.big_xml_data)
            # print('xmlFileName: ',self._xmlFileName)
            with open(destFileName, 'rb') as f:
                context = ET.iterparse(f, events=("end",))
                for event, elem in context:
                    if elem.tag in parametres.splitTags:
                        # print('SubFile: ', xmlfilename+"_"+"{0:0=5d}".format(sequence)+'.xml')
                        fxml = open(destFileName + "_" + "{0:0=5d}".format(sequence) + '.xml', 'wb')
                        fxml.write(bytes(header, 'UTF-8'))
                        content = ET.tostring(elem, encoding="utf-8")

                        fxml.write(content)
                        # fxml.write(bytes(content, 'UTF-8'))
                        fxml.write(bytes(footer, 'UTF-8'))
                        fxml.close()

                        for child in elem:
                            if child.tag == 'infoClient':
                                logging.debug('NClin en cours de traitement : ' + child.attrib['custCode'])
                                self._DijSpliterCoder.extract_ncli_to_file(child.attrib['custCode'],
                                                                           destFileName + "_" + "{0:0=5d}".format(
                                                                               sequence) + '.jrn')
                                break

                        self._DijSpliterCoder.setXMLTotalPages(destFileName + "_" + "{0:0=5d}".format(sequence) + '.xml')

                        sequence += 1
                        elem.clear()

                del context



        except Exception as e:
            # xmlFileName.close()
            print('Quelque chose s\'est mal passée!!')
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            filename = destFileName + "_" + "{0:0=5d}".format(sequence) + '.xml'
            logging.debug("Error creating splited file {} . Detail: {}, {}, {}, {},".format(filename, e, exc_type, fname,
                                                                                            exc_tb.tb_lineno))
            sys.exit(10)

` 它有效,但是当我有一个非常大的元素时,我在mthed tostring 的xml.etree.ElementTree中的关闭文件上进行了I / O操作。 我该怎么办才能修复它?如果它与内存有关,我该如何优化这个过程呢?

0 个答案:

没有答案