Itext / Flying saucer不会将xhtml转换为带有CJK扩展名B字符的PDF

时间:2015-08-06 09:23:34

标签: pdf xhtml pdf-generation itext flying-saucer

我正在尝试使用itext或飞碟将xhtml转换为PDF。 XHTML文件通常包含CJK扩展名B(日语)字符。 PDF文件显示所有英文字符,但显示空白而不是CJK扩展名B(日语)字符。

1 个答案:

答案 0 :(得分:0)

“”和“”是CJK扩展B中的中文字符。

import com.lowagie.text.Document;
import com.lowagie.text.Font;
import com.lowagie.text.Paragraph;
import com.lowagie.text.Phrase;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.pdf.PdfWriter;

import java.awt.Desktop;
import java.io.File;
import java.io.FileOutputStream;
import java.util.LinkedHashMap;
import java.util.Map;

public class TestPdf {

    static final String DEST = "\\result.pdf";

    static final String CHI_FONT = System.getenv("WINDIR") + "\\Fonts\\mingliu.ttc,1";
    static final String CHI_EXTB_FONT = System.getenv("WINDIR") + "\\Fonts\\mingliub.ttc,1";

    private static boolean isCJKExtensionB(int codePoint){
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(codePoint);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B){
            return true;
        }
        return false;
    }

    private static Map<Integer, String> getChunks(String str){
        Map<Integer, String> resultMap = new LinkedHashMap<>();
        if (str == null)
            return resultMap;       

        int i=0, length = str.length();
        Integer lastIndex = null;
        for (int offset = 0; offset < length; ) {
            final int codepoint = str.codePointAt(offset);
            String character = str.substring(offset, offset + Character.charCount(codepoint));
            if (isCJKExtensionB(codepoint)){                   
                if(lastIndex == null)
                    resultMap.put(i, str.substring(0, offset));

                if(lastIndex != null && lastIndex < 0)
                    resultMap.put(lastIndex, resultMap.get(lastIndex) + character);
                else {
                    lastIndex = -1 * ++i;
                    resultMap.put(lastIndex, character);
                }      
            } else if(lastIndex != null) {
                if(lastIndex >= 0)
                    resultMap.put(lastIndex, resultMap.get(lastIndex) + character);
                else {
                    lastIndex = ++i;
                    resultMap.put(lastIndex, character);
                }

            }
            offset += Character.charCount(codepoint);      
        }
        if(resultMap.isEmpty())
            resultMap.put(0, str);
        return resultMap;
    }

    public void createPdf(File targetFile, String text) throws Exception {
        Document document = new Document();
        PdfWriter.getInstance(document, new FileOutputStream(targetFile));
        document.open();

        Font f = new Font(BaseFont.createFont(CHI_FONT, BaseFont.IDENTITY_H, BaseFont.EMBEDDED));
        Font fontExtB = new Font(BaseFont.createFont(CHI_EXTB_FONT, BaseFont.IDENTITY_H, BaseFont.EMBEDDED));
        Paragraph pz = new Paragraph(10, "", f);
        Map<Integer, String> chunkMap = getChunks(text);            

        for(Integer index : chunkMap.keySet()) {
            String txtChunk = chunkMap.get(index);
            pz.add(new Phrase(10, txtChunk, index < 0 ? fontExtB : f));                 
        }
        pz.setKeepTogether(true);
        document.add(pz);       

        document.close();
    }

    public static void main(String[] args) throws Exception {
        File outputFile = new File(DEST);
        new TestPdf().createPdf(outputFile, "明月場");
        if (Desktop.isDesktopSupported()) {
            try {
                Desktop.getDesktop().open(outputFile);
            } catch (Exception ex) {
                // no application registered for PDFs
            }
        }
    }
}