使用Pentaho解析XML数据

时间:2017-03-23 09:41:30

标签: xml pentaho pentaho-spoon pentaho-data-integration

我有以下xml数据。我需要“additionalList”下的数据直接作为属性。但是我无法使用get XML data步骤来完成它。

<?xml version="1.0"?>
<List>
<Local>
<name>XXUT</name>
<age>38</age>
<additionalList>
    <Info>
    <Key>NameID</Key>
    <Value>321</Value>
    </Info>
    <Info>
    <Key>uOiD</Key>
    <Value>21</Value>
    </Info>
    <Info>
    <Key>NameArrayKey</Key>
    <Value>(5,7,9,9)</Value>
    </Info>
</additionalList>
</Local>
</List>

1 个答案:

答案 0 :(得分:0)

您的xml错误,您正在使用Key关闭第一个additionalInfoKey标记。应该是这样的:

<?xml version="1.0"?>
<List>
<Local>
<name>XXUT</name>
<age>38</age>
<additionalList>
  <Info>
  <Key>NameID</Key>
  <Value>321</Value>
  </Info>
  <Info>
  <Key>NameArrayKey</Key>
  <Value>(5,7,9,9)</Value>
  </Info>
</additionalList>
</Local>
</List>

以下解决方案适合您,请参阅预览

的附加屏幕截图
    <?xml version="1.0" encoding="UTF-8"?>
<transformation>
  <info>
    <name>solve</name>
    <description/>
    <extended_description/>
    <trans_version/>
    <trans_type>Normal</trans_type>
    <directory>&#x2f;</directory>
    <parameters>
    </parameters>
    <log>
<trans-log-table><connection/>
<schema/>
<table/>
<size_limit_lines/>
<interval/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STATUS</id><enabled>Y</enabled><name>STATUS</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name><subject/></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name><subject/></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name><subject/></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name><subject/></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name><subject/></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name><subject/></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>STARTDATE</id><enabled>Y</enabled><name>STARTDATE</name></field><field><id>ENDDATE</id><enabled>Y</enabled><name>ENDDATE</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>DEPDATE</id><enabled>Y</enabled><name>DEPDATE</name></field><field><id>REPLAYDATE</id><enabled>Y</enabled><name>REPLAYDATE</name></field><field><id>LOG_FIELD</id><enabled>Y</enabled><name>LOG_FIELD</name></field><field><id>EXECUTING_SERVER</id><enabled>N</enabled><name>EXECUTING_SERVER</name></field><field><id>EXECUTING_USER</id><enabled>N</enabled><name>EXECUTING_USER</name></field><field><id>CLIENT</id><enabled>N</enabled><name>CLIENT</name></field></trans-log-table>
<perf-log-table><connection/>
<schema/>
<table/>
<interval/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>SEQ_NR</id><enabled>Y</enabled><name>SEQ_NR</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>INPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>INPUT_BUFFER_ROWS</name></field><field><id>OUTPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>OUTPUT_BUFFER_ROWS</name></field></perf-log-table>
<channel-log-table><connection/>
<schema/>
<table/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>LOGGING_OBJECT_TYPE</id><enabled>Y</enabled><name>LOGGING_OBJECT_TYPE</name></field><field><id>OBJECT_NAME</id><enabled>Y</enabled><name>OBJECT_NAME</name></field><field><id>OBJECT_COPY</id><enabled>Y</enabled><name>OBJECT_COPY</name></field><field><id>REPOSITORY_DIRECTORY</id><enabled>Y</enabled><name>REPOSITORY_DIRECTORY</name></field><field><id>FILENAME</id><enabled>Y</enabled><name>FILENAME</name></field><field><id>OBJECT_ID</id><enabled>Y</enabled><name>OBJECT_ID</name></field><field><id>OBJECT_REVISION</id><enabled>Y</enabled><name>OBJECT_REVISION</name></field><field><id>PARENT_CHANNEL_ID</id><enabled>Y</enabled><name>PARENT_CHANNEL_ID</name></field><field><id>ROOT_CHANNEL_ID</id><enabled>Y</enabled><name>ROOT_CHANNEL_ID</name></field></channel-log-table>
<step-log-table><connection/>
<schema/>
<table/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>LOG_FIELD</id><enabled>N</enabled><name>LOG_FIELD</name></field></step-log-table>
<metrics-log-table><connection/>
<schema/>
<table/>
<timeout_days/>
<field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>METRICS_DATE</id><enabled>Y</enabled><name>METRICS_DATE</name></field><field><id>METRICS_CODE</id><enabled>Y</enabled><name>METRICS_CODE</name></field><field><id>METRICS_DESCRIPTION</id><enabled>Y</enabled><name>METRICS_DESCRIPTION</name></field><field><id>METRICS_SUBJECT</id><enabled>Y</enabled><name>METRICS_SUBJECT</name></field><field><id>METRICS_TYPE</id><enabled>Y</enabled><name>METRICS_TYPE</name></field><field><id>METRICS_VALUE</id><enabled>Y</enabled><name>METRICS_VALUE</name></field></metrics-log-table>
    </log>
    <maxdate>
      <connection/>
      <table/>
      <field/>
      <offset>0.0</offset>
      <maxdiff>0.0</maxdiff>
    </maxdate>
    <size_rowset>10000</size_rowset>
    <sleep_time_empty>50</sleep_time_empty>
    <sleep_time_full>50</sleep_time_full>
    <unique_connections>N</unique_connections>
    <feedback_shown>Y</feedback_shown>
    <feedback_size>50000</feedback_size>
    <using_thread_priorities>Y</using_thread_priorities>
    <shared_objects_file/>
    <capture_step_performance>N</capture_step_performance>
    <step_performance_capturing_delay>1000</step_performance_capturing_delay>
    <step_performance_capturing_size_limit>100</step_performance_capturing_size_limit>
    <dependencies>
    </dependencies>
    <partitionschemas>
    </partitionschemas>
    <slaveservers>
    </slaveservers>
    <clusterschemas>
    </clusterschemas>
  <created_user>-</created_user>
  <created_date>2017&#x2f;03&#x2f;23 16&#x3a;14&#x3a;46.076</created_date>
  <modified_user>-</modified_user>
  <modified_date>2017&#x2f;03&#x2f;23 16&#x3a;14&#x3a;46.076</modified_date>
    <key_for_session_key>H4sIAAAAAAAAAAMAAAAAAAAAAAA&#x3d;</key_for_session_key>
    <is_key_private>N</is_key_private>
  </info>
  <notepads>
  </notepads>
  <order>
  <hop> <from>Get data from XML</from><to>Split Fields</to><enabled>Y</enabled> </hop>
  <hop> <from>Split Fields</from><to>Select values</to><enabled>Y</enabled> </hop>
  </order>
  <step>
    <name>Get data from XML</name>
    <type>getXMLData</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <include>N</include>
    <include_field/>
    <rownum>N</rownum>
    <addresultfile>N</addresultfile>
    <namespaceaware>N</namespaceaware>
    <ignorecomments>N</ignorecomments>
    <readurl>N</readurl>
    <validating>N</validating>
    <usetoken>N</usetoken>
    <IsIgnoreEmptyFile>N</IsIgnoreEmptyFile>
    <doNotFailIfNoFile>Y</doNotFailIfNoFile>
    <rownum_field/>
    <encoding>UTF-8</encoding>
    <file>
      <name>C&#x3a;&#x5c;Users&#x5c;Desktop&#x5c;stackOverflow.xml</name>
      <filemask/>
      <exclude_filemask/>
      <file_required>N</file_required>
      <include_subfolders>N</include_subfolders>
    </file>
    <fields>
      <field>
        <name>name</name>
        <xpath>name</xpath>
        <element_type>node</element_type>
        <result_type>valueof</result_type>
        <type>String</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
        <repeat>N</repeat>
      </field>
      <field>
        <name>age</name>
        <xpath>age</xpath>
        <element_type>node</element_type>
        <result_type>valueof</result_type>
        <type>Integer</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
        <repeat>N</repeat>
      </field>
      <field>
        <name>Key</name>
        <xpath>additionalList&#x2f;Info&#x2f;Key</xpath>
        <element_type>node</element_type>
        <result_type>valueof</result_type>
        <type>String</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
        <repeat>N</repeat>
      </field>
      <field>
        <name>Value</name>
        <xpath>additionalList&#x2f;Info&#x2f;Value</xpath>
        <element_type>node</element_type>
        <result_type>valueof</result_type>
        <type>Integer</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
        <repeat>N</repeat>
      </field>
      <field>
        <name>additionalList</name>
        <xpath>additionalList</xpath>
        <element_type>node</element_type>
        <result_type>valueof</result_type>
        <type>String</type>
        <format/>
        <currency/>
        <decimal/>
        <group/>
        <length>-1</length>
        <precision>-1</precision>
        <trim_type>none</trim_type>
        <repeat>N</repeat>
      </field>
    </fields>
    <limit>0</limit>
    <loopxpath>&#x2f;List&#x2f;Local</loopxpath>
    <IsInFields>N</IsInFields>
    <IsAFile>Y</IsAFile>
    <XmlField>dummy</XmlField>
    <prunePath/>
    <shortFileFieldName/>
    <pathFieldName/>
    <hiddenFieldName/>
    <lastModificationTimeFieldName/>
    <uriNameFieldName/>
    <rootUriNameFieldName/>
    <extensionFieldName/>
    <sizeFieldName/>
     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>352</xloc>
      <yloc>160</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Select values</name>
    <type>SelectValues</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
    <fields>      <field>        <name>name</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>age</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Key</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Value</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Key2</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Value2</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Key4</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Value4</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Key6</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>      <field>        <name>Value6</name>
        <rename/>
        <length>-2</length>
        <precision>-2</precision>
      </field>        <select_unspecified>N</select_unspecified>
    </fields>     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>736</xloc>
      <yloc>160</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step>
    <name>Split Fields</name>
    <type>FieldSplitter</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
         <partitioning>
           <method>none</method>
           <schema_name/>
           </partitioning>
   <splitfield>additionalList</splitfield>
   <delimiter>&#x24;&#x5b;0A&#x5d;</delimiter>
   <enclosure/>
    <fields>      <field>        <name>key1</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Value1</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Key2</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Value2</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Key3</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Value3</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Key4</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Value4</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Key5</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Value5</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Key6</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>      <field>        <name>Value6</name>
        <id/>
        <idrem>N</idrem>
        <type>String</type>
        <format/>
        <group/>
        <decimal/>
        <currency/>
        <length>-1</length>
        <precision>-1</precision>
        <nullif/>
        <ifnull/>
        <trimtype>both</trimtype>
      </field>    </fields>     <cluster_schema/>
 <remotesteps>   <input>   </input>   <output>   </output> </remotesteps>    <GUI>
      <xloc>512</xloc>
      <yloc>160</yloc>
      <draw>Y</draw>
      </GUI>
    </step>

  <step_error_handling>
  </step_error_handling>
   <slave-step-copy-partition-distribution>
</slave-step-copy-partition-distribution>
   <slave_transformation>N</slave_transformation>

</transformation>