XML到CSV转换Java

时间:2016-09-27 09:56:09

标签: java xml csv xslt

我正致力于将XML转换为CSV数据。通过查看各种示例,我能够编写用于解析XML文件和获取CSV文件的代码。但是,我编写的代码返回的CSV文件没有显示XML文件中存在的所有标记。

我有转换的XSLT。我是XSLT的新手,所以我相信我的XSLT有问题。

这是Java代码:

package com.adarsh.conversions;

import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.w3c.dom.Document;

    class XMLtoCsVConversion {

        public static void main(String args[]) throws Exception {
            File stylesheet = new File("style.xsl");
            File xmlSource = new File("sample_data.xml");

            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document document = builder.parse(xmlSource);

            StreamSource stylesource = new StreamSource(stylesheet);
            Transformer transformer = TransformerFactory.newInstance()
                    .newTransformer(stylesource);
            Source source = new DOMSource(document);
            Result outputTarget = new StreamResult(new File("/tmp/x.csv"));
            transformer.transform(source, outputTarget);
        }
    }

这是我正在使用的XSLT:

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="text"/>

  <xsl:template match="/">
    <xsl:for-each select="*/*[1]/*">
      <xsl:value-of select="name()" />
      <xsl:if test="not(position() = last())">,</xsl:if>
    </xsl:for-each>
    <xsl:text>&#10;</xsl:text>
    <xsl:apply-templates select="*/*" mode="row"/>
  </xsl:template>

  <xsl:template match="*" mode="row">
    <xsl:apply-templates select="*" mode="data" />
    <xsl:text>&#10;</xsl:text>
  </xsl:template>

  <xsl:template match="*" mode="data">
    <xsl:choose>
      <xsl:when test="contains(text(),',')">
        <xsl:text>&quot;</xsl:text>
        <xsl:call-template name="doublequotes">
          <xsl:with-param name="text" select="text()" />
        </xsl:call-template>
        <xsl:text>&quot;</xsl:text>
      </xsl:when>
      <xsl:otherwise>
        <xsl:value-of select="." />
      </xsl:otherwise>
    </xsl:choose>
    <xsl:if test="position() != last()">,</xsl:if>
  </xsl:template>

  <xsl:template name="doublequotes">
    <xsl:param name="text" />
    <xsl:choose>
      <xsl:when test="contains($text,'&quot;')">
        <xsl:value-of select="concat(substring-before($text,'&quot;'),'&quot;&quot;')" />
        <xsl:call-template name="doublequotes">
          <xsl:with-param name="text" select="substring-after($text,'&quot;')" />
        </xsl:call-template>
      </xsl:when>
      <xsl:otherwise>
        <xsl:value-of select="$text" />
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>
</xsl:stylesheet>

这是我尝试转换为CSV的XML文件:

<?xml version="1.0"?>

<school id="100" name="WGen School">

    <grade id="1">
        <classroom id="101" name="Mrs. Jones' Math Class">
            <teacher id="10100000001" first_name="Barbara" last_name="Jones"/>

            <student id="10100000010" first_name="Michael" last_name="Gil"/>
            <student id="10100000011" first_name="Kimberly" last_name="Gutierrez"/>
            <student id="10100000013" first_name="Toby" last_name="Mercado"/>
            <student id="10100000014" first_name="Lizzie" last_name="Garcia"/>
            <student id="10100000015" first_name="Alex" last_name="Cruz"/>
        </classroom>


        <classroom id="102" name="Mr. Smith's PhysEd Class">
            <teacher id="10200000001" first_name="Arthur" last_name="Smith"/>
            <teacher id="10200000011" first_name="John" last_name="Patterson"/>

            <student id="10200000010" first_name="Nathaniel" last_name="Smith"/>
            <student id="10200000011" first_name="Brandon" last_name="McCrancy"/>
            <student id="10200000012" first_name="Elizabeth" last_name="Marco"/>
            <student id="10200000013" first_name="Erica" last_name="Lanni"/>
            <student id="10200000014" first_name="Michael" last_name="Flores"/>
            <student id="10200000015" first_name="Jasmin" last_name="Hill"/>
            <student id="10200000016" first_name="Brittany" last_name="Perez"/>
            <student id="10200000017" first_name="William" last_name="Hiram"/>
            <student id="10200000018" first_name="Alexis" last_name="Reginald"/>
            <student id="10200000019" first_name="Matthew" last_name="Gayle"/>
        </classroom>

        <classroom id="103" name="Brian's Homeroom">
            <teacher id="10300000001" first_name="Brian" last_name="O'Donnell"/>
        </classroom>
    </grade>
</school>

期待的是:

classroom id, classroom_name, teacher_1_id, teacher_1_last_name, teacher_1_first_name, teacher_2_id, teacher_2_last_name, teacher_2_first_name, student_id, student_last_name, student_first_name, grade
101, Mrs. Jones' Math Class, 10100000001, Jones, Barbara, , , , 10100000010, Gil, Michael, 2
101, Mrs. Jones' Math Class, 10100000001, Jones, Barbara, , , , 10100000011, Gutierrez, Kimberly, 2
101, Mrs. Jones' Math Class, 10100000001, Jones, Barbara, , , , 10100000013, Mercado, Toby, 1
101, Mrs. Jones' Math Class, 10100000001, Jones, Barbara, , , , 10100000014, Garcia, Lizzie, 1
101, Mrs. Jones' Math Class, 10100000001, Jones, Barbara, , , , 10100000015, Cruz, Alex, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000010, Smith, Nathaniel, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000011, McCrancy, Brandon, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000012, Marco, Elizabeth, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000013, Lanni, Erica, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000014, Flores, Michael, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000015, Hill, Jasmin, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000016, Perez, Brittany, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000017, Hiram, William, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000018, Reginald, Alexis, 1
102, Mr. Smith's PhysEd Class, 10200000001, Smith, Arthur, 10200000011, Patterson, John, 10200000019, Gayle, Matthew, 1
103, Brian's Homeroom, 10300000001, O'Donnell, Brian, , , , , , ,

但是我只是

  课堂教室

有人可以帮我解决这个问题吗?

P.S。我已经在stackoverflow上提到了有关CSV到XML转换的其他问题。我已经使用这些帖子中提供的信息来帮助我创建XSL。

1 个答案:

答案 0 :(得分:2)

我建议你以此为出发点:

XSLT 1.0

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="UTF-8"/>

<xsl:template match="/school">
    <!-- header -->
    <xsl:text>classroom id,classroom_name,teacher_1_id,teacher_1_last_name,teacher_1_first_name,teacher_2_id,teacher_2_last_name,teacher_2_first_name,student_id,student_last_name,student_first_name,grade&#10;</xsl:text>
    <!-- data -->
    <xsl:for-each select="grade/classroom">
        <!-- classroom data -->
        <xsl:variable name="classroom-data">
            <xsl:value-of select="@id" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="@name" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="teacher[1]/@id" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="teacher[1]/@last_name" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="teacher[1]/@first_name" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="teacher[2]/@id" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="teacher[2]/@last_name" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="teacher[2]/@first_name" />
            <xsl:text>,</xsl:text>
        </xsl:variable>
        <xsl:variable name="grade-id" select="../@id" />
        <xsl:for-each select="student">
            <xsl:copy-of select="$classroom-data"/>
            <!-- student data -->
            <xsl:value-of select="@id" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="@last_name" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="@first_name" />
            <xsl:text>,</xsl:text>
            <xsl:value-of select="$grade-id" />
            <xsl:text>&#10;</xsl:text>
        </xsl:for-each>
    </xsl:for-each>
</xsl:template>

</xsl:stylesheet>

应用于您的输入,结果将是:

classroom id,classroom_name,teacher_1_id,teacher_1_last_name,teacher_1_first_name,teacher_2_id,teacher_2_last_name,teacher_2_first_name,student_id,student_last_name,student_first_name,grade
101,Mrs. Jones' Math Class,10100000001,Jones,Barbara,,,,10100000010,Gil,Michael,1
101,Mrs. Jones' Math Class,10100000001,Jones,Barbara,,,,10100000011,Gutierrez,Kimberly,1
101,Mrs. Jones' Math Class,10100000001,Jones,Barbara,,,,10100000013,Mercado,Toby,1
101,Mrs. Jones' Math Class,10100000001,Jones,Barbara,,,,10100000014,Garcia,Lizzie,1
101,Mrs. Jones' Math Class,10100000001,Jones,Barbara,,,,10100000015,Cruz,Alex,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000010,Smith,Nathaniel,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000011,McCrancy,Brandon,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000012,Marco,Elizabeth,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000013,Lanni,Erica,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000014,Flores,Michael,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000015,Hill,Jasmin,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000016,Perez,Brittany,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000017,Hiram,William,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000018,Reginald,Alexis,1
102,Mr. Smith's PhysEd Class,10200000001,Smith,Arthur,10200000011,Patterson,John,10200000019,Gayle,Matthew,1

请注意,这假设您的输入字段不包含逗号或双引号。