我正在尝试将HTML转换为PDF。在HTML中,我有一些符号没有出现在PDF文档中。在调试时,我发现&#8209或&#x2011字符在IText库中可用的任何默认字体中都不可用。我的HTML在其字体系列中没有Arial Unicode MS,因此即使我将该字体添加到我的itext渲染器它也不起作用。所以我试图在创建期间找出PDF,我们可以用一个字体来做HTML中没有提到? 示例HTML:
<html lang="en">
<title>Sample HTML</title>
<body>
<div style="font-family:Arial, Helvetica, sans-serif; font-size:14px; line-height:23px; color:#3F3F3F; font-weight:normal; ">Please call this
number in case of emergency, 123‑456‑7890.
</div>
</body>
</html>
Java代码:
@Component
public class PDFConverter {
@Autowired
private Tidy tidy;
@Autowired
private ITextRenderer renderer;
@Autowired
private Gson gson;
public String pdfBox(byte[] bytes) {
byte[] outDoc = null;
String pdf = null;
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
String html = new String(bytes);
String xhtml = toXHTML(html);
InputStream is = new ByteArrayInputStream(xhtml.getBytes("UTF-8"));
ITextFontResolver resolver = renderer.getFontResolver();
Document doc = tidy.parseDOM(is, null);
renderer.setDocument(doc, null);
resolver.addFont("*filepath*/ARIALUNI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
renderer.layout();
renderer.createPDF(outputStream);
outDoc = outputStream.toByteArray();
pdf = Base64.getEncoder().encodeToString(outDoc);
renderer.finishPDF();
outputStream.flush();
outputStream.close();
} catch (DocumentException | IOException e) {
e.printStackTrace();
}
return pdf;
}
private String toXHTML(String html) {
final org.jsoup.nodes.Document document = org.jsoup.Jsoup.parse(html, "UTF-8");
document.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
document.outputSettings().prettyPrint(false);
document.outputSettings().charset("ASCII");
document.outputSettings().escapeMode(EscapeMode.xhtml);
return document.html();
}
}