无需在XML中读取HTML

时间:2016-01-15 14:46:38

标签: java html xml

以下代码尝试读取XML并将元素名称和值转换为| sepeared Key | val pairs。

如果val包含HTML标记,则代码应返回val作为HTML AS IS

import java.io.ByteArrayInputStream;
import java.io.InputStream;

import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

public class Example {


    public String invoke(String InputXML) throws Exception {

        String a = InputXML.replace("&lt;", "<");
        String xml = a.replace("&gt;", ">");
        InputStream in = new ByteArrayInputStream(xml.getBytes("utf-8"));
        Document doc = DocumentBuilderFactory.newInstance()
                .newDocumentBuilder().parse(in);

        String Elements = "";
        String Values = "";
        String ElementValues = "";

        Node objects = doc.getDocumentElement();
        for (Node object = objects.getFirstChild(); object != null; object = object
                .getNextSibling()) {
            if (object instanceof Element) {
                Element e = (Element) object;

                if (Elements.length() == 0 && Values.length() == 0) {
                    String s1 = e.getTagName();
                    if (s1.contains(":")) {
                        String[] split = s1.split(":");
                        Elements = split[1];
                    } else {
                        Elements = s1;
                    }
                    Values = e.getTextContent();
                    ElementValues = Elements + "|" + e.getTextContent();
                } else {
                    String s2 = e.getTagName();
                    if (s2.startsWith("ns") && s2.contains(":")) {
                        String[] split = s2.split(":");
                        s2 = split[1];
                    } else {
                        s2 = s2;
                    }
                    Elements = Elements + "," + s2;
                    Values = Values + "," + e.getTextContent();
                    ElementValues = ElementValues + "," + s2 + "|"
                            + e.getTextContent();
                }
            }
        }

        return Elements + "\n" + ElementValues ;
    }

}

Test Codeprivate static void TestInp1Sample() {
        Example parseXML = new Example();
        try {
            String inp = "&lt;?xml version=\"1.0\" encoding=\"UTF-8\"?&gt;&lt;ns0:InputXML xmlns:ns0=\"http://www"
                    + "Application"
                    + "Schema/Schema.xsd\"&gt;&lt;ns0:Name&gt;VV,YY-AWUAH&lt;/ns0:Name&gt;&lt;"
                    + "ns0:MName&gt;  &lt;B&gt; ABCD &lt;/B&gt; EFGH            &lt;/ns0:MName&gt;"
                    + "&lt;ns0:LName&gt;__XXXX__&lt;/ns0:LName&gt;"
                    + "&lt;/ns0:InputXML&gt;";
            System.out.println(parseXML.invoke(inp));
        } catch (Exception e) {

            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

**Expected response** 
Name,MName,LName
Name|LORRAINE,OFORI-AWUAH,MName|  <B> ABCD </B>  EFGH            ,LName|__XXXX__

**Actual**
Name,MName,LName
Name|LORRAINE,OFORI-AWUAH,MName|   ABCD  EFGH            ,LName|__XXXX__

任何解决方案? 我无法在 CDATA 中添加文字 并且不能使用其他java库

0 个答案:

没有答案