如何使用R和XML库

时间:2015-07-21 19:31:48

标签: xml r xpath

我想检索XML值"业务对象"使用R

XML文件是一个yEd图[1]。该文件如下所示

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:java="http://www.yworks.com/xml/yfiles-common/1.0/java" xmlns:sys="http://www.yworks.com/xml/yfiles-common/markup/primitives/2.0" xmlns:x="http://www.yworks.com/xml/yfiles-common/markup/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:y="http://www.yworks.com/xml/graphml" xmlns:yed="http://www.yworks.com/xml/yed/3" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd">
  <!--Created by yEd 3.14.2-->
  <key attr.name="Description" attr.type="string" for="graph" id="d0"/>
  <key for="port" id="d1" yfiles.type="portgraphics"/>
  <key for="port" id="d2" yfiles.type="portgeometry"/>
  <key for="port" id="d3" yfiles.type="portuserdata"/>
  <key attr.name="url" attr.type="string" for="node" id="d4"/>
  <key attr.name="description" attr.type="string" for="node" id="d5"/>
  <key for="node" id="d6" yfiles.type="nodegraphics"/>
  <key for="graphml" id="d7" yfiles.type="resources"/>
  <key attr.name="url" attr.type="string" for="edge" id="d8"/>
  <key attr.name="description" attr.type="string" for="edge" id="d9"/>
  <key for="edge" id="d10" yfiles.type="edgegraphics"/>
  <graph edgedefault="directed" id="G">
    <data key="d0"/>
    <node id="n0">
      <data key="d5"/>
      <data key="d6">
        <y:GenericNode configuration="com.yworks.bpmn.Artifact.withShadow">
          <y:Geometry height="55.0" width="35.0" x="282.5" y="152.5"/>
          <y:Fill color="#FFFFFFE6" transparent="false"/>
          <y:BorderStyle color="#000000" type="line" width="1.0"/>
          <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.701171875" modelName="custom" textColor="#000000" visible="true" width="90.70703125" x="-27.853515625" y="-22.701171875">Business Object<y:LabelModel>
              <y:SmartNodeLabelModel distance="4.0"/>
            </y:LabelModel>
            <y:ModelParameter>
              <y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.5" nodeRatioX="0.0" nodeRatioY="-0.5" offsetX="0.0" offsetY="-4.0" upX="0.0" upY="-1.0"/>
            </y:ModelParameter>
          </y:NodeLabel>
          <y:StyleProperties>
            <y:Property class="java.awt.Color" name="com.yworks.bpmn.icon.line.color" value="#000000"/>
            <y:Property class="java.awt.Color" name="com.yworks.bpmn.icon.fill2" value="#d4d4d4cc"/>
            <y:Property class="java.awt.Color" name="com.yworks.bpmn.icon.fill" value="#ffffffe6"/>
            <y:Property class="com.yworks.yfiles.bpmn.view.BPMNTypeEnum" name="com.yworks.bpmn.type" value="ARTIFACT_TYPE_DATA_OBJECT"/>
            <y:Property class="com.yworks.yfiles.bpmn.view.DataObjectTypeEnum" name="com.yworks.bpmn.dataObjectType" value="DATA_OBJECT_TYPE_PLAIN"/>
          </y:StyleProperties>
        </y:GenericNode>
      </data>
    </node>
  </graph>
  <data key="d7">
    <y:Resources/>
  </data>
</graphml>

使用xmlSpy我可以使用xpath表达式//y:NodeLabel来检索结果Business Object

R代码如下

# load libraries
library(XML)

path = "./Data/"
# Read yed XML file
file.names <- dir(path, pattern ="SingleBO.graphml")
data <- xmlParse(paste(path, file.names[1], sep=""))

# xpath expression
data[["//y:NodeLabel"]]

data[["//y:NodeLabel"]]的结果如下所示

<y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.701171875" modelName="custom" textColor="#000000" visible="true" width="90.70703125" x="-27.853515625" y="-22.701171875">Business Object<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel>
        <y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.5" nodeRatioX="0.0" nodeRatioY="-0.5" offsetX="0.0" offsetY="-4.0" upX="0.0" upY="-1.0"/></y:ModelParameter>
</y:NodeLabel> 

我需要做什么才能检索结果Business Object

非常感谢所有人的帮助。 约翰

[1] http://www.yworks.com/en/products_yed_download.html

2 个答案:

答案 0 :(得分:2)

最简单的方法就是使用xmlValue()

xmlValue(data[["//y:NodeLabel"]])
# [1] "Business Object\n            \n          "

这确实保留了一些可能不合适的空格和换行符。您可以设置trim=TRUE来清理选择

xmlValue(data[["//y:NodeLabel"]], trim=TRUE)
# [1] "Business Object"

答案 1 :(得分:1)

或者,xml2

library(xml2) # i use the github version  
library(stringr)

dat <- read_xml(YOURXMLFILE)

str_trim(xml_text(xml_find_all(dat, "//y:NodeLabel", xml_ns(dat))))
## [1] "Business Object"

如果你是一个管道:

library(magrittr)
library(xml2)
library(stringr)

dat <- read_xml(YOURXMLFILE)

dat %>% 
  xml_find_all("//y:NodeLabel", xml_ns(dat)) %>% 
  xml_text %>% 
  str_trim
## [1] "Business Object"

对较大文件的测试:

dat <- read_xml("http://docs.yworks.com/graphml/demo/yext/graphml/resources/custom/demo.graphml")
dat %>% 
  xml_find_all("//y:NodeLabel", xml_ns(dat)) %>% 
  xml_text %>%
  str_trim
## [1] "1" "2" "3"