给出这个xml文件,我想从中提取数据。但是,我无法从<LandmarkPointListXml>
开始提取数据。
XML文件:
<?xml version="1.0" encoding="utf-8"?>
<Map xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<MapName>er</MapName>
<MapURL>er.gif</MapURL>
<Name>er</Name>
<URL>er.gif</URL>
<LandmarkPointListXml>
<anyType xsi:type="LandmarkPointProperty">
<LandmarkPointX>400</LandmarkPointX>
<LandmarkPointY>292</LandmarkPointY>
<LandmarkDesc>my room door</LandmarkDesc>
</anyType>
<anyType xsi:type="LandmarkPointProperty">
<LandmarkPointX>399</LandmarkPointX>
<LandmarkPointY>219</LandmarkPointY>
<LandmarkDesc>bro room door</LandmarkDesc>
</anyType>
</LandmarkPointListXml>
<RegionPointListXml />
</Map>
Python程序:
def GetMapData(self):
result = ""
haha = self.XMLdoc.firstChild #root node
for child in haha.childNodes:
if (cmp(child.nodeName,'LandmarkPointListXml')==0):
result = result + '|' + self.loopLandmark(child.childNodes) + '|'
else:
result = result + child.firstChild.nodeValue + ','
return result
def loopLandmark(self, landmarks):
result=""
haha=landmarks.getElementsByTagName('anyType')
for child in haha.childNodes:
if (cmp(haha.firstChild.nodeName,'LandmarkPointX') == 0):
result=result+child.firstChild.nodeValue+','
ChildNode = ChildNode.nextSibling
result=result+child.firstChild.nodeValue+','
ChildNode = ChildNode.nextSibling
result=result+child.firstChild.nodeValue
return result
我能够检索结果,“呃,呃,呃,呃,等等”,直到程序到达<LandmarkPointListXml>
。
答案 0 :(得分:2)
此代码非常脆弱。它对XML输入做出了强有力的假设,如果以有效的方式修改XML,则会失败(例如,如果不是紧接着的话)。
我建议在解析XML时使用标准库,例如元素树(http://docs.python.org/library/xml.etree.elementtree.html)或lxml(http://lxml.de),它们也可以验证您的XML输入。
我在下面编写的代码使用了Element Tree并处理了你的XML输入(我删除了父类的'self'参数)。它还容忍(忽略)XML元素中的空值。
import xml.etree.ElementTree as ET
def GetMapData( xmlfile ):
result = ""
try:
tree = ET.parse( xmlfile )
except IOError, e:
print "Failure Parsing %s: %s" % (xmlfile, e)
root = tree.getroot() # root node
for child in root:
if ( child.tag == 'LandmarkPointListXml' ):
result += '|' + loopLandmark(child) + '|'
elif child.text is not None:
result += child.text + ','
return result
def loopLandmark( landmarks ):
result=""
for landmark in landmarks:
if ( landmark.tag == 'anyType' ): # check also xsi:type="LandmarkPointProperty"?
for child in landmark:
if ( child.text and child.tag in [ 'LandmarkPointX', 'LandmarkPointY' ] ):
result += child.text + ','
return result
GetMapData( 'xml.in' )
答案 1 :(得分:0)
我设法从发布的XML文件中提取出数据。但是觉得它比我提供的答案更简单。要获取每个数据,需要进行大量循环。
import sys
import socket
import os
from xml.dom.minidom import Document, parse, parseString
class mapDataClass:
def __init__(self):
self.XMLdoc = Document()
self.MakeRootNode()
def MakeRootNode(self):
self.RootNode = self.XMLdoc.createElement('Map')
self.XMLdoc.appendChild(self.RootNode)
def GetXML_Doc(self):
return self.XMLdoc
def LoadXMLFile(self, AbsFileName):
try:
self.XMLdoc.unlink()
self.XMLdoc = parse(AbsFileName)
if (self.XMLdoc.hasChildNodes()): #if not empty
#Determine if root node <CalibrationData> exist
if (cmp(self.XMLdoc.firstChild.nodeName,
'Map') == 0):
self.RootNode = self.XMLdoc.firstChild
return True
except IOError:
print 'File ' + AbsFileName + ' not found'
return False
def GetMapData(self):
result = ""
haha = self.XMLdoc.firstChild #root node
for child in haha.childNodes:
if cmp(child.nodeName, 'LandmarkPointListXml')==0:
result1 = self.loopLandmark(child)
elif cmp(child.nodeName, 'RegionPointListXml')==0:
print 'Empty'
elif cmp(child.nodeName, 'URL')==0:
result = result + child.firstChild.nodeValue
else:
result = result + child.firstChild.nodeValue + ','
result = result + "|" + result1 + "EMPTY"
return result
def loopLandmark(self,landmarks):
result2=""
tempResult=""
haha=landmarks.getElementsByTagName('anyType')
for i in range(0, len(haha)):
result2=self.loopAnyType(haha[i])
if ((i+1)!=len(haha)):
tempResult = tempResult + result2 + ';'
else:
tempResult = tempResult + result2 + '|'
return tempResult
def loopAnyType(self,anyType):
result3=""
haha1=anyType.getElementsByTagName('LandmarkPointX')[0]
haha2=anyType.getElementsByTagName('LandmarkPointY')[0]
haha3=anyType.getElementsByTagName('LandmarkDesc')[0]
result3 = haha1.firstChild.nodeValue + "," + haha2.firstChild.nodeValue + "," + haha3.firstChild.nodeValue
return result3
profile = mapDataClass()
boolean = profile.LoadXMLFile('upload\er.m')
print boolean
result = profile.GetMapData()
print result
答案 2 :(得分:0)
我之前的回答仍未完成。这是一个认为应该没问题的人。
import sys
import socket
import os
from xml.dom.minidom import Document, parse, parseString, Node
class mapDataClass:
def __init__(self):
self.XMLdoc = Document()
self.MakeRootNode()
def MakeRootNode(self):
self.RootNode = self.XMLdoc.createElement('Map')
self.XMLdoc.appendChild(self.RootNode)
def GetXML_Doc(self):
return self.XMLdoc
def LoadXMLFile(self, AbsFileName):
try:
self.XMLdoc.unlink()
self.XMLdoc = parse(AbsFileName)
if (self.XMLdoc.hasChildNodes()): #if not empty
if (cmp(self.XMLdoc.firstChild.nodeName,
'Map') == 0):
self.RootNode = self.XMLdoc.firstChild
return True
except IOError:
print 'File ' + AbsFileName + ' not found'
return False
def GetMapData(self):
result = ""
result1 = ""
result2 = ""
haha = self.XMLdoc.firstChild #root node
for child in haha.childNodes:
if child.nodeType == Node.ELEMENT_NODE:
if cmp(child.nodeName, 'LandmarkPointListXml')<>0 and cmp(child.nodeName, 'RegionPointListXml')<>0:
if cmp(child.nodeName, 'URL')==0:
result = result + child.firstChild.nodeValue
else:
result = result + child.firstChild.nodeValue + ','
elif cmp(child.nodeName, 'LandmarkPointListXml')==0:
if child.firstChild is not None:
result1 = self.loopLandmark(child)
else:
result1 = 'EMPTY|'
elif cmp(child.nodeName, 'RegionPointListXml')==0:
if child.firstChild is None:
result2 = 'EMPTY'
result = result + "|" + result1 + result2
return result
def loopLandmark(self,landmarks):
result2=""
tempResult=""
haha=landmarks.getElementsByTagName('anyType')
for i in range(0, len(haha)):
result2=self.loopAnyType(haha[i])
if ((i+1)!=len(haha)):
tempResult = tempResult + result2 + ';'
else:
tempResult = tempResult + result2 + '|'
return tempResult
def loopAnyType(self,anyType):
result3=""
haha1=anyType.getElementsByTagName('LandmarkPointX')[0]
haha2=anyType.getElementsByTagName('LandmarkPointY')[0]
haha3=anyType.getElementsByTagName('LandmarkDesc')[0]
result3 = haha1.firstChild.nodeValue + "," + haha2.firstChild.nodeValue + "," + haha3.firstChild.nodeValue
return result3
data = mapDataClass()
success = data.LoadXMLFile("upload\homeTest.m")
if success:
print "file loaded"
print data.GetMapData()
else:
print "no such file found"