里面的图像' div' xmlworker解析后不显示标记

时间:2016-10-25 11:10:50

标签: html image itext xmlworker

在使用xmlworker将HTML转换为PDF的过程中,我遇到了两个问题:

问题1 :标头代码未按预期设置样式 例如,h1标签内容文本字体大小和重量不受封闭标签的影响。同样的事情也适用于其他标题标签(h1-h6),尽管它们被PDF识别并加入书签。

问题2 :如果图片包含在div标签内,则不会显示图片 我正在尝试设置解析图像的alignment属性。但是,当我在ImageProvider中手动执行此操作时,对齐不会反映在PDF文档中。当我创建自己的TagProcessor时,图像在div内部时不会显示。当我将父标记从div更改为p(段落)时,图像显示完美,并且对齐工作正常,包括textwrap。 这是我的代码。

public class PDFCreator {

public static void main(String[] args) {
    try {

        PDFCreator.generatePDF();

    } catch (Exception i1) {
        i1.printStackTrace();
    }
}

private static void generatePDF() throws DocumentException,
        FileNotFoundException, BadElementException, MalformedURLException,
        IOException {

    OutputStream output =  new FileOutputStream("V."+new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime())+".pdf");

    // step 1
    Document document = new Document(PageSize.A3, 30, 30, 60, 100);

    // step 2
    PdfWriter writer = PdfWriter.getInstance(document, output);
    writer.setTagged();
    document.open();

    CSSResolver cssResolver = new StyleAttrCSSResolver();
    CssFile cssFile = XMLWorkerHelper.getCSS(new ByteArrayInputStream(""
            .getBytes()));
    cssResolver.addCss(cssFile);

    // HTML
    MyHtmlPipelineContext htmlContext = new MyHtmlPipelineContext();
    //htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());

    TagProcessorFactory factory = Tags.getHtmlTagProcessorFactory();

     factory.removeProcessor(HTML.Tag.IMG);
     factory.addProcessor(new ImageTagProcessor(), HTML.Tag.IMG);
    htmlContext.setTagFactory(factory);
    htmlContext.setImageProvider(new Base64ImageProvider());

    // Pipelines
    PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
    HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
    CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);

    // XML Worker
    XMLWorker worker = new XMLWorker(css, true);
    XMLParser p = new XMLParser(worker);
    p.parse(new FileInputStream("page02.html"));


    // step 5
    document.close();

}

}

public class MyHtmlPipelineContext extends HtmlPipelineContext {
public MyHtmlPipelineContext() {
    super(null);
}

public HtmlPipelineContext clone() {
    HtmlPipelineContext ctx = null;
    try {
        ctx = super.clone();
        ctx.setImageProvider(new Base64ImageProvider());
    } catch (Exception e) {
        // handle
    }
    return ctx;
}

}



public class ImageTagProcessor extends com.itextpdf.tool.xml.html.Image {
public List<Element> end(final WorkerContext ctx, final Tag tag, final      List<Element> currentContent) {
List<Element> list = new ArrayList<Element>(1);

list.add(getImageObject(ctx, tag));
return list;
}
public static Image getImageObject(WorkerContext ctx, Tag tag) {
Map<String, String> tagAttributes = tag.getAttributes();
Map<String, String> tagCss = tag.getCSS();
Image imgObj = null;
try {
    String heightAttribute;
    String widthAtrribute;
    String src = (String)tagAttributes.get("src");
    int pos = src.indexOf("base64,");
    int height = 0;
    int width = 0;
    if (src.startsWith("data") && pos > 0) {
        byte[] img = Base64.decode((String)src.substring(pos + 7));
        imgObj = Image.getInstance((byte[])img);
    } else {
        imgObj = Image.getInstance((String)src);
    }
    String floatValue = (String)tagCss.get("float");
    if (floatValue != null) {
        if (floatValue.equalsIgnoreCase("right")) {
            imgObj.setAlignment(Image.RIGHT | Image.TEXTWRAP);
        } else if (floatValue.equalsIgnoreCase("left")) {
            imgObj.setAlignment(Image.LEFT | Image.TEXTWRAP);
        }
    }
    if ((widthAtrribute = (String)tagAttributes.get("width")) != null && widthAtrribute.trim().length() > 0) {
        try {
            width = Integer.parseInt(widthAtrribute);
        }
        catch (NumberFormatException var11_13) {
            // empty catch block
        }
    }
    if ((heightAttribute = (String)tagAttributes.get("height")) != null && heightAttribute.trim().length() > 0) {
        try {
            height = Integer.parseInt(heightAttribute);
        }
        catch (NumberFormatException var12_15) {
            // empty catch block
        }
    }
    if (width > 0 && height > 0) {
        imgObj.scaleAbsolute((float)width, (float)height);
    }
    return imgObj;
}
catch (BadElementException ex) {
    return null;
}
catch (IOException ex) {
    return null;
}
}

}


public class Base64ImageProvider extends AbstractImageProvider {
public Image retrieve(String src) {
    int pos = src.indexOf("base64,");
    try {
          Image imgObj = null;

        if (src.startsWith("data") && pos > 0) {
            byte[] img = Base64.decode(src.substring(pos + 7));
            imgObj = Image.getInstance(img);
        }

        else {
             imgObj = Image.getInstance(src);
        }

        super.store(src, imgObj);
        return imgObj;
    } catch (BadElementException ex) {
        return null;
    } catch (IOException ex) {
        return null;
    }
}
public String getImageRootPath() {
    return null;
}
}

page02.html

<html>
<body ><h1>hello</h1>
<div style="font-size: medium;">
<img align="right"
src="path"
style="width: 267px; height: 200px; float: left;"  /></p>
</body>
</html>

我错过了任何配置吗? 如果图像位于段落标记内但不在div标记中,为什么图像会正确显示? 我在哪里修改代码才能使其正常工作? 注意:如果我使用默认的tagProcessor,图像显示正确,但没有对齐或文本换行。

0 个答案:

没有答案