因此,对于一个项目,我必须编写一个类,该类接受带有参数nameSpaceID,articleID,title,一组字符串的多个对象“Page”,然后将它们输出到xml文件中。我试图通过使用XMLOutputFactory和XMLStreamWriter来解决它,将xml写入StringWriter,然后将StringWriter转换为正确的格式(indent和stuff),最后将其输出到.xml文件中。到目前为止一切都有效,但如果我放一个>我需要帮助逃避特殊字符。例如在我的fileName中,它不会被转义。我尝试使用StringEscapeUtils.escapeXml10(字符串)转义它,但这只会使我的输出更糟。
import java.io.FileOutputStream;
import org.apache.commons.lang3.StringEscapeUtils;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashSet;
import java.util.Set;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
/**
*
*/
/**
* @author Paul
*
*/
public class PageExport {
/**
* @param args
*/
public void printPagestoXML(Page[] pages, String fileName, String filePath){
try {
StringWriter xmlRAW = new StringWriter();
XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory();
xmlOutputFactory.setProperty("escapeCharacters", false);
XMLStreamWriter xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(xmlRAW);
xmlStreamWriter.writeStartDocument("UTF-8", "1.0");
xmlStreamWriter.writeStartElement("pages");
for(int i = 0; i < pages.length; i++){
xmlStreamWriter.writeStartElement("page");
xmlStreamWriter.writeAttribute("pageID", pages[i].getArticleID() + "");
xmlStreamWriter.writeAttribute("namespaceID", pages[i].getNamespaceID() + "");
xmlStreamWriter.writeAttribute("title", StringEscapeUtils.escapeXml10(pages[i].getTitle()));
if (pages[i].getCategories() != null){
xmlStreamWriter.writeStartElement("categories");
for(int j = 0; j < pages[i].getCategories().size(); j++) {
xmlStreamWriter.writeEmptyElement("category");
xmlStreamWriter.writeAttribute("name", pages[i].getCategories().toArray()[j].toString());
}
xmlStreamWriter.writeEndElement(); //end of categories
}
xmlStreamWriter.writeEndElement(); //end of page i
}
xmlStreamWriter.writeEndElement(); //end of pages
xmlStreamWriter.writeEndDocument(); // end of document
xmlStreamWriter.flush();
xmlStreamWriter.close();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult streamResult = new StreamResult(new FileOutputStream(filePath + fileName));
transformer.transform(new StreamSource(new StringReader(xmlRAW.getBuffer().toString())), streamResult);
}
catch (Exception e){
System.out.println(e.getMessage());
}
}
public static void main(String[] args) {
String goodFilePath = System.getProperty("user.dir") + "/src/data/";
String goodFileName = "test.xml";
Set<String> testCategories = new HashSet<String>();
testCategories.add("this");
testCategories.add("is");
testCategories.add("sparta");
Page[] testPages = {new Page(0, 1337, "l33t", testCategories), new Page(0, 1338, "l33t>", testCategories)};
PageExport pe = new PageExport();
pe.printPagestoXML(testPages, goodFileName, goodFilePath);
}
}
此代码的输出(第二页标题是重要的):
<?xml version="1.0" encoding="UTF-8"?>
<pages>
<page pageID="1337" namespaceID="0" title="l33t">
<categories>
<category name="this"/>
<category name="is"/>
<category name="sparta"/>
</categories>
</page>
<page pageID="1338" namespaceID="0" title="l33t&gt;">
<categories>
<category name="this"/>
<category name="is"/>
<category name="sparta"/>
</categories>
</page>
</pages>
没有StringEscapeUtils.escapeXml10(标题):
<?xml version="1.0" encoding="UTF-8"?>
<pages>
<page pageID="1337" namespaceID="0" title="l33t">
<categories>
<category name="this"/>
<category name="is"/>
<category name="sparta"/>
</categories>
</page>
<page pageID="1338" namespaceID="0" title="l33t>">
<categories>
<category name="this"/>
<category name="is"/>
<category name="sparta"/>
</categories>
</page>
</pages>
我想要的是什么:
<?xml version="1.0" encoding="UTF-8"?>
<pages>
<page pageID="1337" namespaceID="0" title="l33t">
<categories>
<category name="this"/>
<category name="is"/>
<category name="sparta"/>
</categories>
</page>
<page pageID="1338" namespaceID="0" title="l33t>">
<categories>
<category name="this"/>
<category name="is"/>
<category name="sparta"/>
</categories>
</page>
</pages>
编辑:我通过将DOCTYPE_PUBLIC设置为“是”修复了该问题,新代码:
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.log4j.Logger;
/**
* @author Paul
*
*/
public class PageExport {
Logger log = Logger.getLogger(PageExport.class);
/**
* Converts a collection of Pages into a XML String and then into a XML file.
*
* @param pages The collection or Pages, that shall be written into the file.
* @param filepath The full path of the XML file.
* @see #printPagestoXML(Page[], String, String)
* @see Page
*
*/
public void printPagestoXML(Page[] pages, String filepath){
//Converting a single input filepath into a filepath & filename and
//then running the method with the arguments
String newfilepath = "";
String[] splitpath = filepath.split("/");
for (int i = 0; i < splitpath.length - 1 ; i++){
newfilepath += (splitpath[i] + "/");
}
printPagestoXML(pages, newfilepath, splitpath[splitpath.length - 1].split("\\.")[0]);
}
/**
* Converts a collection of Pages into a XML String and then into a XML file.
*
* @param pages The collection or Pages, that shall be written into the file.
* @param filepath The path of the XML file.
* @param filename Name of the .xml file (Without .xml)
* @see #printPagestoXML(Page[], String, String)
* @see Page
*
*/
public void printPagestoXML(Page[] pages, String filepath, String filename){
try {
//Method starts of by creating a new outputfactory, that prints to a StringWriter,
//so that the xml String can still be transformed before getting output.
StringWriter rawXml = new StringWriter();
XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory();
XMLStreamWriter xmlStreamWriter = xmlOutputFactory.createXMLStreamWriter(rawXml);
xmlStreamWriter.writeStartDocument("UTF-8", "1.0"); //start of the XML stream
xmlStreamWriter.writeStartElement("pages"); //the first element "pages"
for(int i = 0; i < pages.length; i++){
//loop to create elements for all pages in the collection
log.info("Creating Page " + i + ": " + pages[i].getTitle());
xmlStreamWriter.writeStartElement("page");
xmlStreamWriter.writeAttribute("pageID", pages[i].getArticleID() + "");
xmlStreamWriter.writeAttribute("namespaceID", pages[i].getNamespaceID() + "");
xmlStreamWriter.writeAttribute("title", pages[i].getTitle());
if (pages[i].getCategories() != null){
xmlStreamWriter.writeStartElement("categories");
for(int j = 0; j < pages[i].getCategories().size(); j++) {
//loop to create all categories for the currently creating page
log.trace("Creating Category " + j + ": " + pages[i].getCategories().toArray()[j].toString());
xmlStreamWriter.writeEmptyElement("category");
xmlStreamWriter.writeAttribute("name", pages[i].getCategories().toArray()[j].toString());
}
xmlStreamWriter.writeEndElement(); //end of categories
}
else {
// in case a page doesn't categories, the element wont be created and a warning is posted
log.info("Page " + (i + 1) + " does not have categories (" + pages[i].toString() + ")");
}
xmlStreamWriter.writeEndElement(); //end of page i
}
log.info("Last page written.");
xmlStreamWriter.writeEndElement(); //end of pages
xmlStreamWriter.writeEndDocument(); // end of document
xmlStreamWriter.flush();
xmlStreamWriter.close(); //close the streamwriter
/*
* The StringWriter variable rawXml now contains the whole XML string, but it still has to be
* transformed, otherwise it would all be printed in one line.
*/
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "yes"); //Setting the output properties
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); //for the transformer
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult streamResult = new StreamResult(new FileOutputStream(filepath + filename + ".xml"));
//initiation of the output streamresult with the filepath
transformer.transform(new StreamSource(new StringReader(rawXml.toString())), streamResult);
log.info(filename + ".xml created.");
//transformation / formatting of the xml string and output into .xml file
} catch (Exception e){
log.warn(e.getMessage());
}
}
答案 0 :(得分:3)
“&”字符
&
和左侧括号<
只有在用作标记分隔符或注释,处理指令或CDATA部分时才会以字面形式出现。如果在其他地方需要它们,则必须分别使用数字字符引用或字符串&
和<
对其进行转义。右尖括号
>
可以使用字符串>
表示,并且为了兼容性,必须使用>
或字符引用(当字符串{]]>
出现时进行转义) 1}}在内容中,当该字符串未标记CDATA部分的结尾时。
现在应该清楚,为什么它不像你期望的那样工作。
答案 1 :(得分:-1)
在build.gradle
中的依赖项中添加以下行编译'commons-lang:commons-lang:2.5'
对于unescape使用
$rootScope.myHTML = link;
或
String title = StringEscapeUtils.unescapeJava(.getTitle());
逃脱使用
String title = StringEscapeUtils.unescapeJava(userProfile.getScreen_name().replace("\n", "\\n")
.replace("&", "&"));
或
String title = StringEscapeUtils.escapeJava(xmlResponse.getTitle());