尝试使用C#.NET和PDFBox 1.7.0从现有PDF中提取嵌入式文件附件

时间:2013-12-23 22:47:57

标签: c# .net pdf pdfbox ikvm

我正在尝试使用C#.NET和PDFBox从现有PDF中提取嵌入式文件附件。 以下是我的代码:

using System.Collections.Generic;
using System.IO;
using java.util;                                            // IKVM Java for Microsoft .NET  http://www.ikvm.net  
using java.io;                                              // IKVM Java for Microsoft .NET  http://www.ikvm.net
using org.apache.pdfbox.pdmodel;                            // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.pdmodel.common;                     // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.pdmodel.common.filespecification;   // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.cos;                                // PDFBox 1.7.0 http://pdfbox.apache.org

namespace PDFClass
{
    public class Class1
    {
        public Class1 ()
        {
        }

        public void ReadPDFAttachments (string existingFileNameFullPath)
        {
            PDEmbeddedFilesNameTreeNode efTree;
            PDComplexFileSpecification fs;
            FileStream stream;
            ByteArrayInputStream fakeFile;
            PDDocument pdfDocument = new PDDocument();
            PDEmbeddedFile ef;
            PDDocumentNameDictionary names;
            Map efMap = new HashMap();

            pdfDocument = PDDocument.load(existingFileNameFullPath);
            PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(pdfDocument.getDocumentCatalog());
            PDEmbeddedFilesNameTreeNode embeddedFiles = namesDictionary.getEmbeddedFiles(); // some bug is currently preventing this call from working! >:[

            if (embeddedFiles != null)
            {
                var aKids = embeddedFiles.getKids().toArray();
                List<PDNameTreeNode> kids = new List<PDNameTreeNode>();
                foreach (object oKid in aKids)
                {
                    kids.Add(oKid as PDNameTreeNode);
                }
                if (kids != null)
                {
                    foreach (PDNameTreeNode kid in kids)
                    {
                        PDComplexFileSpecification spec = (PDComplexFileSpecification)kid.getValue("ZUGFERD_XML_FILENAME");
                        PDEmbeddedFile file = spec.getEmbeddedFile();
                        fs = new PDComplexFileSpecification();

                        // Loop through each file for re-embedding
                        byte[] data = file.getByteArray();
                        int read = data.Length;
                        fakeFile = new ByteArrayInputStream(data);
                        ef = new PDEmbeddedFile(pdfDocument, fakeFile);
                        fs.setEmbeddedFile(ef);

                        efMap.put(kid.toString(), fs);
                        embeddedFiles.setNames(efMap);
                        names = new PDDocumentNameDictionary(pdfDocument.getDocumentCatalog());
                        ((COSDictionary)efTree.getCOSObject()).removeItem(COSName.LIMITS);  // Bug in PDFBox code requires we do this, or attachment will not embed. >:[
                        names.setEmbeddedFiles(embeddedFiles);
                        pdfDocument.getDocumentCatalog().setNames(names);
                        fs.getCOSDictionary().setString("Desc", kid.toString()); // adds a description to attachment in PDF attachment list
                    }
                }
            }
        }

    }
}

变量embeddedFiles始终为null。即使我在代码中放了一个中断,并且可以看到PDF文件中有明确的附件。

非常感谢任何帮助!

0 个答案:

没有答案