如何使用PDFBOX java提取字体颜色?

时间:2013-04-19 13:04:53

标签: java pdfbox

实际上我需要提取每个字符的字体颜色,在论坛下面找到这段代码但是在执行它时会抛出错误

Apr 19, 2013 6:23:45 PM org.apache.pdfbox.util.operator.pagedrawer.FillNonZeroRule process
WARNING: java.lang.ClassCastException: org.apache.pdfbox.util.PDFStreamEngine cannot be cast to org.apache.pdfbox.pdfviewer.PageDrawer
java.lang.ClassCastException: org.apache.pdfbox.util.PDFStreamEngine cannot be cast to org.apache.pdfbox.pdfviewer.PageDrawer"



PDDocument doc = null;
try {
    doc = PDDocument.load("C:/Path/To/Pdf/Sample.pdf");
    PDFStreamEngine engine = new PDFStreamEngine(ResourceLoader.loadProperties("org/apache/pdfbox/resources/PageDrawer.properties"));
    PDPage page = (PDPage)doc.getDocumentCatalog().getAllPages().get(0);
    engine.processStream(page, page.findResources(), page.getContents().getStream());
    PDGraphicsState graphicState = engine.getGraphicsState();
    System.out.println(graphicState.getStrokingColor().getColorSpace().getName());
    float colorSpaceValues[] = graphicState.getStrokingColor().getColorSpaceValue();
    for (float c : colorSpaceValues) {
        System.out.println(c * 255);
    }
}
finally {
    if (doc != null) {
        doc.close();
    }

任何人都可以帮助我 谢谢

1 个答案:

答案 0 :(得分:0)

查看包含以下内容的org.apache.pdfbox.pdfviewer.PageDrawer

protected void processTextPosition( TextPosition text )
{
    try
    {
        PDGraphicsState graphicsState = getGraphicsState();
        Composite composite;
        Paint paint;
        switch(graphicsState.getTextState().getRenderingMode()) 
        {
            case PDTextState.RENDERING_MODE_FILL_TEXT:
                composite = graphicsState.getNonStrokeJavaComposite();
                paint = graphicsState.getNonStrokingColor().getJavaColor();
                if (paint == null)
                {
                    paint = graphicsState.getNonStrokingColor().getPaint(pageSize.height);
                }
                break;
            case PDTextState.RENDERING_MODE_STROKE_TEXT:
                composite = graphicsState.getStrokeJavaComposite();
                paint = graphicsState.getStrokingColor().getJavaColor();
                if (paint == null)
                {
                    paint = graphicsState.getStrokingColor().getPaint(pageSize.height);
                }
                break;
            case PDTextState.RENDERING_MODE_NEITHER_FILL_NOR_STROKE_TEXT:
                //basic support for text rendering mode "invisible"
                Color nsc = graphicsState.getStrokingColor().getJavaColor();
                float[] components = {Color.black.getRed(),Color.black.getGreen(),Color.black.getBlue()};
                paint = new Color(nsc.getColorSpace(),components,0f);
                composite = graphicsState.getStrokeJavaComposite();
                break;
            default:
                // TODO : need to implement....
                LOG.debug("Unsupported RenderingMode "
                        + this.getGraphicsState().getTextState().getRenderingMode()
                        + " in PageDrawer.processTextPosition()."
                        + " Using RenderingMode "
                        + PDTextState.RENDERING_MODE_FILL_TEXT
                        + " instead");
                composite = graphicsState.getNonStrokeJavaComposite();
                paint = graphicsState.getNonStrokingColor().getJavaColor();
        }
        graphics.setComposite(composite);
        graphics.setPaint(paint);

        PDFont font = text.getFont();
        Matrix textPos = text.getTextPos().copy();
        float x = textPos.getXPosition();
        // the 0,0-reference has to be moved from the lower left (PDF) to the upper left (AWT-graphics)
        float y = pageSize.height - textPos.getYPosition();
        // Set translation to 0,0. We only need the scaling and shearing
        textPos.setValue(2, 0, 0);
        textPos.setValue(2, 1, 0);
        // because of the moved 0,0-reference, we have to shear in the opposite direction
        textPos.setValue(0, 1, (-1)*textPos.getValue(0, 1));
        textPos.setValue(1, 0, (-1)*textPos.getValue(1, 0));
        AffineTransform at = textPos.createAffineTransform();
        PDMatrix fontMatrix = font.getFontMatrix();
        at.scale(fontMatrix.getValue(0, 0) * 1000f, fontMatrix.getValue(1, 1) * 1000f);
        //TODO setClip() is a massive performance hot spot. Investigate optimization possibilities
        graphics.setClip(graphicsState.getCurrentClippingPath());
        // the fontSize is no longer needed as it is already part of the transformation
        // we should remove it from the parameter list in the long run
        font.drawString( text.getCharacter(), text.getCodePoints(), graphics, 1, at, x, y );
    }
    catch( IOException io )
    {
        io.printStackTrace();
    }
}

,其中显示了如何提取颜色和其他属性。