这是我从中解析数据的xml节点示例的格式:
<!-- /StationName/BACnetTemp/MNB_1_HX/HiPressureAlarm -->
<node name="HiPressureAlarm" class="tridium.control.BinaryInputNode" module="coreRuntime" release="2.301.535.v1">
<properties>
<position><x>576</x><y>866</y></position>
<timeDelay>
<duration>60</duration>
</timeDelay>
<eventEnable>
<toOffnormal>true</toOffnormal>
<toFault>false</toFault>
<toNormal>true</toNormal>
</eventEnable>
<alarmText>MCD Basement Re-Heat High Pressure Alarm</alarmText>
<changeOfStateTime>2018-05-07T08:55:04.09-4</changeOfStateTime>
<changeOfStateCount>848</changeOfStateCount>
<elapsedActiveTime>
<duration>126872</duration>
</elapsedActiveTime>
<activeInactiveText>
<active>Alarm</active>
<inactive>Normal</inactive>
</activeInactiveText>
<alarmValueEnabled>true</alarmValueEnabled>
</properties>
</node> <!-- HiPressureAlarm -->
开头的注释是我试图将某些数据从中导出到excel文件中的路径。除了能够将路径与我要从中提取数据的节点相关联之外,我还有其他所有工作。
我可以使用以下代码将所有评论放在列表中:
comments=soup.findAll(text=lambda text:isinstance(text, Comment))
然后我尝试以下操作以查找注释并将其与路径相关联:
for comment in comments:
x='/'+nodeName
if x in comment:
nodePath = comment
问题在于,有几个具有相同名称的节点具有不同的路径,因此它为每个节点提供了相同的路径。因此,我在for循环后立即添加了以下代码:
if nodePath in comments:
comments.remove(nodePath)
这应该可行,但是下一个问题是xml中存在多个相同注释的实例,并且顺序与查找节点及其数据的顺序不匹配,因此路径不匹配与适当的节点匹配。
有什么方法可以找到节点,然后在注释之前将其分配给变量,然后解析为excel?
这是我解析数据的完整代码:
def alarms(self,soup):
alarms=soup.find_all('toOffnormal')
comments=soup.findAll(text=lambda text:isinstance(text, Comment))
nodeStartList=[]
for alarm in alarms:
nodeStart=alarm.parent.parent.parent
nodeStartList.append(nodeStart)
dataList=[]
for item in nodeStartList:
nodeName=item['name']
for comment in comments:
x='/'+nodeName
if x in comment:
nodePath = comment
if nodePath in comments:
comments.remove(nodePath)
if item.find('timeDelay')!= None:
timeDelay=item.find('timeDelay').get_text("|", strip=True)
else:
timeDelay='0'
if item.find('eventEnable')!=None:
toOffnormal=item.find('toOffnormal').get_text("| ", strip=True)
toFault=item.find('toFault').get_text("| ", strip=True)
toNormal=item.find('toNormal').get_text("| ", strip=True)
else:
toOffnormal='false'
toFault='false'
toNormal='false'
alarmText=item.find('alarmText').get_text("| ", strip=True)
if item.find('highLimit')!= None:
highLimit=item.find('highLimit').get_text("| ", strip=True)
else:
highLimit='N/A'
if item.find('lowLimit')!= None:
lowLimit=item.find('lowLimit').get_text("| ", strip=True)
else:
lowLimit='N/A'
if item.find('deadband'):
deadband=item.find('deadband').get_text("| ", strip=True)
else:
deadband='N/A'
if item.find('lowLimitEnabled'):
lowLimitEnabled=item.find('lowLimitEnabled').get_text("| ", strip=True)
else:
lowLimitEnabled='false'
if item.find('highLimitEnabled'):
highLimitEnabled=item.find('highLimitEnabled').get_text("| ", strip=True)
else:
highLimitEnabled='false'
itemList=[nodeName,nodePath,timeDelay,toOffnormal,toFault,toNormal,alarmText,highLimit,lowLimit,deadband,lowLimitEnabled,highLimitEnabled]
dataList.append(itemList)
self.df=pandas.DataFrame(dataList)
self.df.columns=['pointName','pointPath','timeDelay','toOffnormal','toFault','toNormal','alarmText','highLimit','lowLimit','deadband','lowLimitEnabled','highLimitEnabled']
return self.df
答案 0 :(得分:0)
我能够通过在第10行之后插入nodePath = item.previous_element.previous_element来实现我的目标。我的结果如下:
def alarms(self,soup):
alarms=soup.find_all('toOffnormal')
#comments=soup.findAll(text=lambda text:isinstance(text, Comment))
nodeStartList=[]
for alarm in alarms:
nodeStart=alarm.parent.parent.parent
nodeStartList.append(nodeStart)
dataList=[]
for item in nodeStartList:
nodeName=item['name']
nodePath=item.previous_element.previous_element
#for comment in comments:
#x='/'+nodeName
#if x in comment:
#nodePath = comment
#if nodePath in comments:
#comments.remove(nodePath)
if item.find('timeDelay')!= None:
timeDelay=item.find('timeDelay').get_text("|", strip=True)
else:
timeDelay='0'
if item.find('eventEnable')!=None:
toOffnormal=item.find('toOffnormal').get_text("| ", strip=True)
toFault=item.find('toFault').get_text("| ", strip=True)
toNormal=item.find('toNormal').get_text("| ", strip=True)
else:
toOffnormal='false'
toFault='false'
toNormal='false'
alarmText=item.find('alarmText').get_text("| ", strip=True)
if item.find('highLimit')!= None:
highLimit=item.find('highLimit').get_text("| ", strip=True)
else:
highLimit='N/A'
if item.find('lowLimit')!= None:
lowLimit=item.find('lowLimit').get_text("| ", strip=True)
else:
lowLimit='N/A'
if item.find('deadband'):
deadband=item.find('deadband').get_text("| ", strip=True)
else:
deadband='N/A'
if item.find('lowLimitEnabled'):
lowLimitEnabled=item.find('lowLimitEnabled').get_text("| ", strip=True)
else:
lowLimitEnabled='false'
if item.find('highLimitEnabled'):
highLimitEnabled=item.find('highLimitEnabled').get_text("| ", strip=True)
else:
highLimitEnabled='false'
itemList=[nodeName,nodePath,timeDelay,toOffnormal,toFault,toNormal,alarmText,highLimit,lowLimit,deadband,lowLimitEnabled,highLimitEnabled]
dataList.append(itemList)
self.df=pandas.DataFrame(dataList)
self.df.columns=['pointName','pointPath','timeDelay','toOffnormal','toFault','toNormal','alarmText','highLimit','lowLimit','deadband','lowLimitEnabled','highLimitEnabled']
return self.df