如何使用mupdf从pdf中提取图像?

时间:2013-09-26 09:04:12

标签: visual-c++ pdf mupdf

我想从pdf中提取图像并将图像句柄保存到std :: vector,有时候背景不正确,我的代码就跟着了。

    BOOL CTextEditorDoc::loadImage()
{
    if(m_strPDFPath.IsEmpty())
        return FALSE;

    CString strFile;
    fz_context *ctx;
    fz_document* doc;

    fz_device *dev;

    fz_irect bbox;
    fz_rect bounds;
    fz_matrix ctm;
    fz_pixmap *image;
    fz_colorspace *colorspace;

    int i,j,rotation = 0;
    int pagecount = 0;
    fz_page *page;

    BITMAPINFO bmi;
    HBITMAP hBitmap;
    LPBYTE pDest,pImage;

    if(!gb2312toutf8(m_strPDFPath,strFile))
        return FALSE;

    ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
    fz_try(ctx){
        doc = fz_open_document(ctx,strFile.GetBuffer(0));
    }fz_catch(ctx){
        fz_free_context(ctx);
        return FALSE;
    }

    fz_rotate(&ctm, rotation);
    colorspace = fz_device_rgb(ctx);
    pagecount = fz_count_pages(doc);

    pDest = NULL;
    ::ZeroMemory(&bmi, sizeof(BITMAPINFO));
    bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bmi.bmiHeader.biCompression = BI_RGB;
    bmi.bmiHeader.biPlanes = 1;
    bmi.bmiHeader.biBitCount = 32;
    bmi.bmiHeader.biWidth = 180;
    bmi.bmiHeader.biHeight = -180;
    bmi.bmiHeader.biSizeImage = 180*180*4;

    for(i=0;i<pagecount;i++){
        page = fz_load_page(doc,i);

        if(i == 0){
            fz_bound_page(doc,page,&bounds);
            fz_pre_scale(&ctm,180/(bounds.x1 - bounds.x0),180/(bounds.y1 - bounds.y0));
            fz_transform_rect(&bounds, &ctm);
            fz_round_rect(&bbox, &bounds);
        }

        image = fz_new_pixmap_with_bbox(ctx,colorspace,&bbox);
        dev = fz_new_draw_device(ctx,image);

        fz_try(ctx){
            fz_run_page(doc,page,dev,&ctm,NULL);
        }fz_catch(ctx){
            fz_drop_pixmap(ctx,image);
            fz_free_device(dev);
            fz_free_page(doc, page);
            continue;
        }

        pImage = image->samples;
        if(pImage){
            pDest = NULL;
            hBitmap = ::CreateDIBSection(NULL,&bmi,DIB_RGB_COLORS,(void**)&pDest,NULL,0);
            ASSERT(hBitmap);

            if(image->n == 2){ //not pallet
                for (j = 180* 180; j > 0 ; j--){
                    pDest[0] = pDest[1] = pDest[2] = *pImage++;
                    pDest[3] = *pImage++;
                    pDest += 4;
                }
            }else if(image->n == 4){
                //memcpy(pDest,pImage,m_thumbWidth * m_thumbHeight*4);
                for (j = 180* 180; j > 0 ; j--){
                    pDest[0] = *pImage++;
                    pDest[1] = *pImage++;
                    pDest[2] = *pImage++;
                    pDest[3] = *pImage++;
                    pDest += 4;
                }
            }else ASSERT(FALSE);

            m_imageVector.push_back(hBitmap);// save it to std::vector
        }

        fz_drop_pixmap(ctx,image);
        fz_free_device(dev);
        fz_free_page(doc, page);
    }

    fz_close_document(doc);
    fz_free_context(ctx);
    return TRUE;
}

这段代码可以提取pdf的所有图像,但它可能太慢了,如何改进呢? 有时候图像的背景不正确吗?

跟随图片的左边是不正确的,右图是真实的。

screentshot http://s22.postimg.org/bsdgn57ml/result.jpg

1 个答案:

答案 0 :(得分:2)

http://mupdf.com/docs/example.c

的示例相同 你忘了

fz_clear_pixmap_with_value(ctx,pix,0xff);

白色像素图