我正在尝试使用python和Sax解析器解析xml文件。
以下部分<!DOCTYPE BMECAT SYSTEM "bmecat_new_catalog_1_2.dtd">.
我尝试使用DTDResolver和EntityResolver验证文件,但出现以下错误: bmecat_new_catalog_1_2.dtd:19:0:处理外部实体引用时出错
我也试图跳过!DOCTYPE,但是没有用。
删除DOCTYPE不是一个选项。
import xml.sax, csv, pandas, time
from xml.sax.handler import EntityResolve
class micro_bme_handler( xml.sax.ContentHandler ):
def __init__(self):
self.CurrentData = ""
self.ID= ""
self.ref_system=''
self.ref_system_ID=''
self._current_product_data=None
self.save_box= [] # list of dict
def startElement(self, tag, attributes): # Call when an element starts
self.CurrentData = tag
if tag == "ARTICLE":
data_product= {'suplier_AID':None, 'System_reference': None, 'FEATURE_GROUP_ID':None}
self._current_product_data=data_product
pass
if tag == "REFERENCE_FEATURE_SYSTEM_NAME": pass
if tag == "REFERENCE_FEATURE_GROUP_ID": pass
def endElement(self, tag): # Call when an elements ends
if self.CurrentData == "SUPPLIER_AID":
self._current_product_data['suplier_AID']= self.ID
elif self.CurrentData == "REFERENCE_FEATURE_SYSTEM_NAME":
self._current_product_data['System_reference']= self.ref_system
elif self.CurrentData == "REFERENCE_FEATURE_GROUP_ID":
self._current_product_data['FEATURE_GROUP_ID']= self.ref_system_ID
self.save_box.append(self._current_product_data)
self.CurrentData = ""
def characters(self, content): # Call when a character is read
if self.CurrentData == "SUPPLIER_AID": self.ID = content
elif self.CurrentData == "REFERENCE_FEATURE_SYSTEM_NAME":
self.ref_system = content
elif self.CurrentData == "REFERENCE_FEATURE_GROUP_ID":
self.ref_system_ID = content
def skippedEntity(self, name):
print ('following entity was skipped from parsing: ', name)
class DTDResolver(EntityResolver):
def resolveEntity(self, publicId, systemId):
systemId='......\\BMEcat\\version\\1.2\\bmecat_new_catalog_1_2.dtd'
return systemId
您是否知道如何避免DOCTYP验证,或者您看到验证中的内容有误?
提前谢谢