使用JAVA将CSV转换为XML

时间:2012-08-25 07:24:07

标签: java xml csv

我有一组要转换为XML的CSV数据。代码看起来不错,但输出不够完美。它省略了一些列,因为它们没有任何值,并且产生了很长的XML数据而不是破坏它。

这是我的CSV数据示例:

Name  Age Sex
chi   23   
kay   19  male
John      male

我的代码:

public class XMLCreators {
  // Protected Properties
  protected DocumentBuilderFactory domFactory = null;
  protected DocumentBuilder domBuilder = null;

  public XMLCreators() {
    try {
      domFactory = DocumentBuilderFactory.newInstance();
      domBuilder = domFactory.newDocumentBuilder();
    } catch (FactoryConfigurationError exp) {
      System.err.println(exp.toString());
    } catch (ParserConfigurationException exp) {
      System.err.println(exp.toString());
    } catch (Exception exp) {
      System.err.println(exp.toString());
    }

  }

  public int convertFile(String csvFileName, String xmlFileName,
      String delimiter) {

    int rowsCount = -1;
    try {
      Document newDoc = domBuilder.newDocument();
      // Root element
      Element rootElement = newDoc.createElement("XMLCreators");
      newDoc.appendChild(rootElement);
      // Read csv file
      BufferedReader csvReader;
      csvReader = new BufferedReader(new FileReader(csvFileName));
      int fieldCount = 0;
      String[] csvFields = null;
      StringTokenizer stringTokenizer = null;

      // Assumes the first line in CSV file is column/field names
      // The column names are used to name the elements in the XML file,
      // avoid the use of Space or other characters not suitable for XML element
      // naming

      String curLine = csvReader.readLine();
      if (curLine != null) {
        // how about other form of csv files?
        stringTokenizer = new StringTokenizer(curLine, delimiter);
        fieldCount = stringTokenizer.countTokens();
        if (fieldCount > 0) {
          csvFields = new String[fieldCount];
          int i = 0;
          while (stringTokenizer.hasMoreElements())
            csvFields[i++] = String.valueOf(stringTokenizer.nextElement());
        }
      }

      // At this point the coulmns are known, now read data by lines
      while ((curLine = csvReader.readLine()) != null) {
        stringTokenizer = new StringTokenizer(curLine, delimiter);
        fieldCount = stringTokenizer.countTokens();
        if (fieldCount > 0) {
          Element rowElement = newDoc.createElement("row");
          int i = 0;
          while (stringTokenizer.hasMoreElements()) {
            try {
              String curValue = String.valueOf(stringTokenizer.nextElement());
              Element curElement = newDoc.createElement(csvFields[i++]);
              curElement.appendChild(newDoc.createTextNode(curValue));
              rowElement.appendChild(curElement);
            } catch (Exception exp) {
            }
          }
          rootElement.appendChild(rowElement);
          rowsCount++;
        }
      }
      csvReader.close();

      // Save the document to the disk file
      TransformerFactory tranFactory = TransformerFactory.newInstance();
      Transformer aTransformer = tranFactory.newTransformer();
      Source src = new DOMSource(newDoc);
      Result result = new StreamResult(new File(xmlFileName));
      aTransformer.transform(src, result);
      rowsCount++;

      // Output to console for testing
      // Resultt result = new StreamResult(System.out);

    } catch (IOException exp) {
      System.err.println(exp.toString());
    } catch (Exception exp) {
      System.err.println(exp.toString());
    }
    return rowsCount;
    // "XLM Document has been created" + rowsCount;
  }
}

当对上述数据执行此代码时,它会产生:

<?xml version="1.0" encoding="UTF-8"?>
<XMLCreators>
<row>
<Name>chi</Name>
<Age>23</Age>
</row>
<row>
<Name>kay</Name>
<Age>19</Age>
<sex>male</sex>
</row>
<row>
<Name>john</Name>
<Age>male</Age>
</row>
</XMLCreators>

我自己以这种形式安排了它,但输出产生了一条长线。要生成的输出应为:

<?xml version="1.0" encoding="UTF-8"?>
<XMLCreators>
<row>
<Name>chi</Name>
<Age>23</Age>
<sex></sex>
</row>
<row>
<Name>kay</Name>
<Age>19</Age>
<sex>male</sex>
</row>
<row>
<Name>john</Name>
<Age></Age>
<sex>male</sex>
 </row>
 </XMLCreators>

3 个答案:

答案 0 :(得分:2)

我同意肯尼特的意见。

我只是添加了

aTransformer .setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer .setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer .setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

这在元素之间添加了一条新行,并允许缩进。

<强>已更新

让我们首先看一下你所呈现的文件不是CSV(逗号分隔值)文件,我会让你担心这个问题......

List<String> headers = new ArrayList<String>(5);

File file = new File("Names2.csv");
BufferedReader reader = null;

try {

    DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder domBuilder = domFactory.newDocumentBuilder();

    Document newDoc = domBuilder.newDocument();
    // Root element
    Element rootElement = newDoc.createElement("XMLCreators");
    newDoc.appendChild(rootElement);

    reader = new BufferedReader(new FileReader(file));
    int line = 0;

    String text = null;
    while ((text = reader.readLine()) != null) {

        StringTokenizer st = new StringTokenizer(text, " ", false);    
        String[] rowValues = new String[st.countTokens()];
        int index = 0;
        while (st.hasMoreTokens()) {

            String next = st.nextToken();
            rowValues[index++] = next;

        }

        //String[] rowValues = text.split(",");

        if (line == 0) { // Header row
            for (String col : rowValues) {
                headers.add(col);
            }
        } else { // Data row
            Element rowElement = newDoc.createElement("row");
            rootElement.appendChild(rowElement);
            for (int col = 0; col < headers.size(); col++) {
                String header = headers.get(col);
                String value = null;

                if (col < rowValues.length) {
                    value = rowValues[col];
                } else {
                    // ?? Default value
                    value = "";
                }

                Element curElement = newDoc.createElement(header);
                curElement.appendChild(newDoc.createTextNode(value));
                rowElement.appendChild(curElement);
            }
        }
        line++;
    }

    ByteArrayOutputStream baos = null;
    OutputStreamWriter osw = null;

    try {
        baos = new ByteArrayOutputStream();
        osw = new OutputStreamWriter(baos);

        TransformerFactory tranFactory = TransformerFactory.newInstance();
        Transformer aTransformer = tranFactory.newTransformer();
        aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
        aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
        aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

        Source src = new DOMSource(newDoc);
        Result result = new StreamResult(osw);
        aTransformer.transform(src, result);

        osw.flush();
        System.out.println(new String(baos.toByteArray()));
    } catch (Exception exp) {
        exp.printStackTrace();
    } finally {
        try {
            osw.close();
        } catch (Exception e) {
        }
        try {
            baos.close();
        } catch (Exception e) {
        }
    }
} catch (Exception e) {
    e.printStackTrace();
}

现在我在这里使用了List而不是Map。您需要决定如何最好地解决缺失值问题。事先不知道文件的结构,这不是一个简单的解决方案。

任何方式,我最终都是

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<XMLCreators>
    <row>
        <Name>chi</Name>
        <Age>23</Age>
        <Sex/>
    </row>
    <row>
        <Name>kay</Name>
        <Age>19</Age>
        <Sex>male</Sex>
    </row>
    <row>
        <Name>John</Name>
        <Age>male</Age>
        <Sex/>
    </row>
</XMLCreators>

已更新合并

public class XMLCreators {
    // Protected Properties

    protected DocumentBuilderFactory domFactory = null;
    protected DocumentBuilder domBuilder = null;

    public XMLCreators() {
        try {
            domFactory = DocumentBuilderFactory.newInstance();
            domBuilder = domFactory.newDocumentBuilder();
        } catch (FactoryConfigurationError exp) {
            System.err.println(exp.toString());
        } catch (ParserConfigurationException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }

    }

    public int convertFile(String csvFileName, String xmlFileName,
                    String delimiter) {

        int rowsCount = -1;
        try {
            Document newDoc = domBuilder.newDocument();
            // Root element
            Element rootElement = newDoc.createElement("XMLCreators");
            newDoc.appendChild(rootElement);
            // Read csv file
            BufferedReader csvReader;
            csvReader = new BufferedReader(new FileReader(csvFileName));

//                int fieldCount = 0;
//                String[] csvFields = null;
//                StringTokenizer stringTokenizer = null;
//
//                // Assumes the first line in CSV file is column/field names
//                // The column names are used to name the elements in the XML file,
//                // avoid the use of Space or other characters not suitable for XML element
//                // naming
//
//                String curLine = csvReader.readLine();
//                if (curLine != null) {
//                    // how about other form of csv files?
//                    stringTokenizer = new StringTokenizer(curLine, delimiter);
//                    fieldCount = stringTokenizer.countTokens();
//                    if (fieldCount > 0) {
//                        csvFields = new String[fieldCount];
//                        int i = 0;
//                        while (stringTokenizer.hasMoreElements()) {
//                            csvFields[i++] = String.valueOf(stringTokenizer.nextElement());
//                        }
//                    }
//                }
//
//                // At this point the coulmns are known, now read data by lines
//                while ((curLine = csvReader.readLine()) != null) {
//                    stringTokenizer = new StringTokenizer(curLine, delimiter);
//                    fieldCount = stringTokenizer.countTokens();
//                    if (fieldCount > 0) {
//                        Element rowElement = newDoc.createElement("row");
//                        int i = 0;
//                        while (stringTokenizer.hasMoreElements()) {
//                            try {
//                                String curValue = String.valueOf(stringTokenizer.nextElement());
//                                Element curElement = newDoc.createElement(csvFields[i++]);
//                                curElement.appendChild(newDoc.createTextNode(curValue));
//                                rowElement.appendChild(curElement);
//                            } catch (Exception exp) {
//                            }
//                        }
//                        rootElement.appendChild(rowElement);
//                        rowsCount++;
//                    }
//                }
//                csvReader.close();
//
//                // Save the document to the disk file
//                TransformerFactory tranFactory = TransformerFactory.newInstance();
//                Transformer aTransformer = tranFactory.newTransformer();
//                Source src = new DOMSource(newDoc);
//                Result result = new StreamResult(new File(xmlFileName));
//                aTransformer.transform(src, result);
//                rowsCount++;
            int line = 0;
            List<String> headers = new ArrayList<String>(5);

            String text = null;
            while ((text = csvReader.readLine()) != null) {

                StringTokenizer st = new StringTokenizer(text, delimiter, false);
                String[] rowValues = new String[st.countTokens()];
                int index = 0;
                while (st.hasMoreTokens()) {

                    String next = st.nextToken();
                    rowValues[index++] = next;

                }

                if (line == 0) { // Header row

                    for (String col : rowValues) {
                        headers.add(col);
                    }

                } else { // Data row

                    rowsCount++;

                    Element rowElement = newDoc.createElement("row");
                    rootElement.appendChild(rowElement);
                    for (int col = 0; col < headers.size(); col++) {

                        String header = headers.get(col);
                        String value = null;

                        if (col < rowValues.length) {

                            value = rowValues[col];

                        } else {
                            // ?? Default value
                            value = "";
                        }

                        Element curElement = newDoc.createElement(header);
                        curElement.appendChild(newDoc.createTextNode(value));
                        rowElement.appendChild(curElement);

                    }

                }
                line++;

            }

            ByteArrayOutputStream baos = null;
            OutputStreamWriter osw = null;

            try {

                baos = new ByteArrayOutputStream();
                osw = new OutputStreamWriter(baos);

                TransformerFactory tranFactory = TransformerFactory.newInstance();
                Transformer aTransformer = tranFactory.newTransformer();
                aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
                aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
                aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

                Source src = new DOMSource(newDoc);
                Result result = new StreamResult(osw);
                aTransformer.transform(src, result);

                osw.flush();
                System.out.println(new String(baos.toByteArray()));

            } catch (Exception exp) {
                exp.printStackTrace();
            } finally {
                try {
                    osw.close();
                } catch (Exception e) {
                }
                try {
                    baos.close();
                } catch (Exception e) {
                }
            }

            // Output to console for testing
            // Resultt result = new StreamResult(System.out);

        } catch (IOException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }
        return rowsCount;
        // "XLM Document has been created" + rowsCount;
    }
}

使用OpenCSV更新

public class XMLCreators {
    // Protected Properties

    protected DocumentBuilderFactory domFactory = null;
    protected DocumentBuilder domBuilder = null;

    public XMLCreators() {
        try {
            domFactory = DocumentBuilderFactory.newInstance();
            domBuilder = domFactory.newDocumentBuilder();
        } catch (FactoryConfigurationError exp) {
            System.err.println(exp.toString());
        } catch (ParserConfigurationException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }

    }

    public int convertFile(String csvFileName, String xmlFileName,
                    String delimiter) {

        int rowsCount = -1;
        BufferedReader csvReader;
        try {
            Document newDoc = domBuilder.newDocument();
            // Root element
            Element rootElement = newDoc.createElement("XMLCreators");
            newDoc.appendChild(rootElement);
            // Read csv file
            csvReader = new BufferedReader(new FileReader(csvFileName));

            //** Now using the OpenCSV **//
            CSVReader reader = new CSVReader(new FileReader("names.csv"), delimiter.charAt(0));
            //CSVReader reader = new CSVReader(csvReader);
            String[] nextLine;
            int line = 0;
            List<String> headers = new ArrayList<String>(5);
            while ((nextLine = reader.readNext()) != null) {

                if (line == 0) { // Header row
                    for (String col : nextLine) {
                        headers.add(col);
                    }
                } else { // Data row
                    Element rowElement = newDoc.createElement("row");
                    rootElement.appendChild(rowElement);

                    int col = 0;
                    for (String value : nextLine) {
                        String header = headers.get(col);

                        Element curElement = newDoc.createElement(header);
                        curElement.appendChild(newDoc.createTextNode(value.trim()));
                        rowElement.appendChild(curElement);

                        col++;
                    }
                }
                line++;
            }
            //** End of CSV parsing**//

            FileWriter writer = null;

            try {

                writer = new FileWriter(new File(xmlFileName));

                TransformerFactory tranFactory = TransformerFactory.newInstance();
                Transformer aTransformer = tranFactory.newTransformer();
                aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
                aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
                aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

                Source src = new DOMSource(newDoc);
                Result result = new StreamResult(writer);
                aTransformer.transform(src, result);

                writer.flush();

            } catch (Exception exp) {
                exp.printStackTrace();
            } finally {
                try {
                    writer.close();
                } catch (Exception e) {
                }
            }

            // Output to console for testing
            // Resultt result = new StreamResult(System.out);

        } catch (IOException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }
        return rowsCount;
        // "XLM Document has been created" + rowsCount;
    }
}

答案 1 :(得分:0)

public int convertFile(String csvFileName, String xmlFileName, String delimiter) {

    int rowsCount = -1;
    try {
        Document newDoc = domBuilder.newDocument();
        // Root element
        Element rootElement = newDoc.createElement("XMLCreators");
        newDoc.appendChild(rootElement);
        // Read csv file
        BufferedReader csvReader;
        csvReader = new BufferedReader(new FileReader(csvFileName));
        int fieldCount = 0;
        String[] csvFields = null;
        String[] csvValues = null;


        // Assumes the first line in CSV file is column/field names
        // The column names are used to name the elements in the XML file,
        // avoid the use of Space or other characters not suitable for XML element
        // naming

        String curLine = csvReader.readLine();
        if (curLine != null) {
            csvFields=curLine.split(",", -1);
            }

        // At this point the coulmns are known, now read data by lines

        int i =0;
        while ((curLine = csvReader.readLine()) != null) {
            csvValues=curLine.split(",", -1);
            Element rowElement = newDoc.createElement("record-"+i);
            try {
            for(fieldCount=0;fieldCount<csvValues.length;fieldCount++)
            {
                String curValue = csvValues[fieldCount];
                Element curElement = newDoc.createElement(csvFields[fieldCount]);
                curElement.appendChild(newDoc.createTextNode(curValue));
                rowElement.appendChild(curElement);
            }
            } catch (Exception exp) {
            }
            rootElement.appendChild(rowElement);
            i++;

        }

答案 2 :(得分:0)

package org.mycompany.conversion;

import java.util.List;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

import org.apache.camel.dataformat.bindy.annotation.CsvRecord;
import org.apache.camel.dataformat.bindy.annotation.DataField;

@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
@CsvRecord(separator = ",")
public class Person {

    @DataField(pos = 1)
    @XmlElement
    private int id;
    @DataField(pos = 2)
    @XmlElement
    private String name;
    @DataField(pos = 3)
    @XmlElement
    private int age;



    @Override
    public String toString() {
        return "Person [id=" + id + ", name=" + name + ", age=" + age + "]";
    }

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

}

=========================================================================================package org.mycompany.conversion;

import java.util.List;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;

@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public class Greeting {

    @XmlElementWrapper(name = "Person")
    @XmlElement(name = "persons")
    private List<Person> persons;

    public List<Person> getPerson() {
        return persons;
    }

    public void setPerson(List<Person> persons) {
        this.persons = persons;
    }

}

=========================================================================================
package org.mycompany.conversion;

import javax.xml.bind.JAXBContext;

import org.apache.camel.CamelContext;
import org.apache.camel.Exchange;
import org.apache.camel.Processor;
import org.apache.camel.builder.RouteBuilder;
import org.apache.camel.converter.jaxb.JaxbDataFormat;
import org.apache.camel.dataformat.bindy.csv.BindyCsvDataFormat;
import org.apache.camel.impl.DefaultCamelContext;
import org.apache.camel.spi.DataFormat;

public class CsvConversion {

    public static void main(String[] args) throws Exception {

        JaxbDataFormat xmlDataFormat = new JaxbDataFormat();
        JAXBContext con = JAXBContext.newInstance(Greeting.class);
        xmlDataFormat.setContext(con);
        DataFormat bindy = new BindyCsvDataFormat(Person.class);
        CamelContext context = new DefaultCamelContext();
        context.addRoutes(new RouteBuilder() {
            @Override
            public void configure() throws Exception {

                from("file:src/main/data/in/csv?noop=true").split().tokenize("\n").unmarshal(bindy)
                        // constanr(true):-aggregate all using same expression
                        .aggregate(constant(true), new AttachAggregation())
                        //mandatory after aggregate
                        .completionTimeout(100)//end after this gives problem
                        .marshal(xmlDataFormat).log("xml body is ${body}")
                        .to("file:src/main/data/in/xml?fileName=convertedXml.xml");// .aggregate(new
                                                                                    // AttachAggreagation());
            }
        });
        context.start();
        Thread.sleep(5000);
    }
}
===================================================================================
package org.mycompany.conversion;

import java.util.ArrayList;
import java.util.List;

import org.apache.camel.Exchange;
import org.apache.camel.processor.aggregate.AggregationStrategy;

public class AttachAggregation implements AggregationStrategy {

    List<Person> list = new ArrayList();
    Greeting gre = new Greeting();

    @Override
//person-address
    // greeting-user
    public Exchange aggregate(Exchange oldExchange, Exchange newExchange) {

        if (oldExchange == null) {
            Person newBody1 = newExchange.getIn().getBody(Person.class);
            list.add(newBody1);
            return newExchange;
        } else {
            Person newBody2 = newExchange.getIn().getBody(Person.class);
            list.add(newBody2);

            gre.setPerson(list);
            oldExchange.getIn().setBody(gre);
            return oldExchange;
        }

    }

}