使用docx4j将HTML代码转换为Word图像未嵌入Word文档中

时间:2014-12-26 07:44:15

标签: html image jpeg docx docx4j

示例程序......

import java.io.IOException;
import org.docx4j.Docx4jProperties;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.model.structure.PageSizePaper;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.CTAltChunk;

public class HtmlToDoc {
    public static void main(String[] args) throws Docx4JException {
        String html="", s="", filepath="E://HtmlToDoc//";

        try {

            String html = "<html><head><title>Import me</title></head><body><p>Hello World! Sample Program</p><img src="E:/HtmlToDoc/LOGO.JPEG"/></body></html>";

            Docx4jProperties.getProperties().setProperty("docx4j.PageSize", "B4JIS");
            String papersize= Docx4jProperties.getProperties().getProperty("docx4j.PageSize", "B4JIS");
            String landscapeString = Docx4jProperties.getProperties().getProperty("docx4j.PageOrientationLandscape", "true");
            boolean landscape= Boolean.parseBoolean(landscapeString);

            WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(PageSizePaper.valueOf(papersize), landscape);
            AlternativeFormatInputPart afiPart = new AlternativeFormatInputPart(new PartName("/hw.html"));

            afiPart.setBinaryData(html.getBytes());
            //afiPart.setBinaryData(fileContent);

            afiPart.setContentType(new ContentType("text/html"));
            Relationship altChunkRel = wordMLPackage.getMainDocumentPart().addTargetPart(afiPart);

            // .. the bit in document body
            CTAltChunk ac = Context.getWmlObjectFactory().createCTAltChunk();
            ac.setId(altChunkRel.getId() );
            wordMLPackage.getMainDocumentPart().addObject(ac);

            // .. content type
            wordMLPackage.getContentTypeManager().addDefaultContentType("html", "text/html");
            wordMLPackage.save(new java.io.File("E://HtmlToDoc//" + "test.docx"));

        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } 
    }

}

这在我的本地计算机上正常运行。但我把这个代码移到服务器上我的word文档图像没有嵌入但我给了正确的图像路径[当我在服务器中将HTML转换为PDF时,相同的图像路径工作正常]。在服务器[linux机器和IBM websphere App Server和ApacheWeb服务器]中运行时,图像丢失的原因可能是什么。即使我的所有路径(word文档,图像,html文档)都相同。

1 个答案:

答案 0 :(得分:1)

您的代码依赖Word将altChunk转换为HTML,因此,如果您在本地计算机上打开Word文档,它将无法在E:/HtmlToDoc/LOGO.JPEG上看到图像服务器

您可以使用网址或数据URI。

或者,使用docx4j-ImportXHTML,它将执行转换而不会留下任何Word。