我正在尝试使用C#.NET和PDFBox从现有PDF中提取嵌入式文件附件。 以下是我的代码:
using System.Collections.Generic;
using System.IO;
using java.util; // IKVM Java for Microsoft .NET http://www.ikvm.net
using java.io; // IKVM Java for Microsoft .NET http://www.ikvm.net
using org.apache.pdfbox.pdmodel; // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.pdmodel.common; // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.pdmodel.common.filespecification; // PDFBox 1.7.0 http://pdfbox.apache.org
using org.apache.pdfbox.cos; // PDFBox 1.7.0 http://pdfbox.apache.org
namespace PDFClass
{
public class Class1
{
public Class1 ()
{
}
public void ReadPDFAttachments (string existingFileNameFullPath)
{
PDEmbeddedFilesNameTreeNode efTree;
PDComplexFileSpecification fs;
FileStream stream;
ByteArrayInputStream fakeFile;
PDDocument pdfDocument = new PDDocument();
PDEmbeddedFile ef;
PDDocumentNameDictionary names;
Map efMap = new HashMap();
pdfDocument = PDDocument.load(existingFileNameFullPath);
PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary(pdfDocument.getDocumentCatalog());
PDEmbeddedFilesNameTreeNode embeddedFiles = namesDictionary.getEmbeddedFiles(); // some bug is currently preventing this call from working! >:[
if (embeddedFiles != null)
{
var aKids = embeddedFiles.getKids().toArray();
List<PDNameTreeNode> kids = new List<PDNameTreeNode>();
foreach (object oKid in aKids)
{
kids.Add(oKid as PDNameTreeNode);
}
if (kids != null)
{
foreach (PDNameTreeNode kid in kids)
{
PDComplexFileSpecification spec = (PDComplexFileSpecification)kid.getValue("ZUGFERD_XML_FILENAME");
PDEmbeddedFile file = spec.getEmbeddedFile();
fs = new PDComplexFileSpecification();
// Loop through each file for re-embedding
byte[] data = file.getByteArray();
int read = data.Length;
fakeFile = new ByteArrayInputStream(data);
ef = new PDEmbeddedFile(pdfDocument, fakeFile);
fs.setEmbeddedFile(ef);
efMap.put(kid.toString(), fs);
embeddedFiles.setNames(efMap);
names = new PDDocumentNameDictionary(pdfDocument.getDocumentCatalog());
((COSDictionary)efTree.getCOSObject()).removeItem(COSName.LIMITS); // Bug in PDFBox code requires we do this, or attachment will not embed. >:[
names.setEmbeddedFiles(embeddedFiles);
pdfDocument.getDocumentCatalog().setNames(names);
fs.getCOSDictionary().setString("Desc", kid.toString()); // adds a description to attachment in PDF attachment list
}
}
}
}
}
}
变量embeddedFiles始终为null。即使我在代码中放了一个中断,并且可以看到PDF文件中有明确的附件。
非常感谢任何帮助!