我必须在odt文件中更新文件content.xml。 我想获取相同的文件内容,没有元素空格和没有换行符。
我尝试在Java中使用带有Transformer对象的xsl转换,它部分有效。 例如,有一个简单的odt,content.xml如下:
<?xml version="1.0" encoding="utf-8" ?>
<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.2">
<office:font-face-decls>
<style:font-face style:name="Courier New" style:font-family-generic="modern" style:font-pitch="fixed" svg:font-family="'Courier New'" />
</office:font-face-decls>
<office:automatic-styles>
</office:automatic-styles>
<office:body>
<office:text>
<text:p text:style-name="Title">TODO supply a title</text:p>
<text:p text:style-name="Text_20_body">TODO write content</text:p>
<text:h text:style-name="Heading_20_1" text:outline-level="1">My First Heading</text:h>
<text:p text:style-name="First_20_paragraph">My first paragraph.</text:p>
</office:text>
</office:body>
</office:document-content>
我写了这个xsl文件来删除所有换行符,元素之间的空格,所以我想在一行中获得相同的XML内容序列化。
<?xml version="1.0" encoding="ISO-8859-15" ?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xalan="http://xml.apache.org/xslt"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
xmlns:math="http://www.w3.org/1998/Math/MathML"
xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
xmlns:ooow="http://openoffice.org/2004/writer"
xmlns:oooc="http://openoffice.org/2004/calc"
xmlns:dom="http://www.w3.org/2001/xml-events"
xmlns:xforms="http://www.w3.org/2002/xforms"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
extension-element-prefixes="office style text"
>
<xsl:output method="xml" encoding="ISO-8859-15" indent="no"/>
<xsl:strip-space elements="*" />
<xsl:template match="@*|node()|comment()|processing-instruction()|text()">
<xsl:copy>
<xsl:apply-templates select="@*|node()|comment()|processing-instruction()|text()" />
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
我使用了缩进=&#34;没有&#34;和条形空间元素=&#34; *&#34;没有标识,但现在我的问题是名称空间。 我使用这个xsl获得的XML如下:
<?xml version="1.0" encoding="ISO-8859-15"?><document-content xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.2"><font-face-decls><font-face style:font-family-generic="modern" style:font-pitch="fixed" style:name="Courier New" svg:font-family="'Courier New'"/></font-face-decls><automatic-styles/><body><text><p text:style-name="Title">TODO supply a title</p><p text:style-name="Text_20_body">TODO write content</p><h text:outline-level="1" text:style-name="Heading_20_1">My First Heading</h><p text:style-name="First_20_paragraph">My first paragraph.</p></text></body></document-content>
它按照我的意愿排在一行,但元素没有名称空间前缀。 请注意,对于具有正确名称空间的属性,它不会发生。
在我指定匹配节点,注释和属性的xsl中,<xsl:stylesheet>
元素中的命名空间和传递允许命名空间列表的extension-element-prefixs。
如果我删除了extension-element-prefix,则没有任何改变。
使用Transformer对象的Java代码如下:
public void serializeXML(String filePath, String destinationPath) throws Exception {
File xmlFile = new File(filePath);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(xmlFile);
doc.normalizeDocument();
DOMSource domSource = new DOMSource(doc);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
InputStream is = XMLSerializer.class.getClassLoader().getResourceAsStream("identer.xsl");
Transformer transformer = transformerFactory.newTransformer(new StreamSource(is));
StringWriter sw = new StringWriter();
StreamResult sr = new StreamResult(sw);
transformer.transform(domSource, sr);
Result result = new StreamResult(new FileOutputStream(new File(destinationPath)));
transformer.transform(domSource, result);
LOGGER.info(sw.toString());
}
我声明了Transformer对象,并向其传递了xsl文件的inputStream。 我没有设置任何属性,因为我在xsl文件中声明了所有(在我看来)是必要的,之后我转换文档,将结果保存到文件中并用记录器打印。
有人可以帮助我吗?
谢谢。
答案 0 :(得分:0)
我解决了,感谢回答我问题的用户。
我修改了我的代码,所以xsl现在是:
<xsl:output method="xml" encoding="ISO-8859-15" indent="no"/>
<xsl:strip-space elements="*" />
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
然后我简化了使用Transformer的Java代码:
public void serializeXML(String filePath, String destinationPath) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
InputStream is = XMLSerializer.class.getClassLoader().getResourceAsStream("identer.xsl");
Source xslt = new StreamSource(is);
Transformer transformer = factory.newTransformer(xslt);
Source text = new StreamSource(new File(filePath));
transformer.transform(text, new StreamResult(new File(destinationPath)));
}