从安全的PDF中读取附件

时间:2016-07-21 13:58:10

标签: pdf c#-4.0 itext attachment 3d-secure

我正在处理一个PDF文件,这是一个安全的文件,并且Excel文件中附有一个Excel文件。

以下是我试过的代码。

    static void Main(string[] args)
    {
        Program pgm = new Program();
        pgm.EmbedAttachments();
        //pgm.ExtractAttachments(pgm.pdfFile);
    }

    private void ExtractAttachments(string _pdfFile)
    {
        try
        {
            if (!Directory.Exists(attExtPath))
                Directory.CreateDirectory(attExtPath);

            byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
            //byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");


            PdfDictionary documentNames = null;
            PdfDictionary embeddedFiles = null;
            PdfDictionary fileArray = null;
            PdfDictionary file = null;
            PRStream stream = null;

            //PdfReader reader = new PdfReader(_pdfFile);

            PdfReader reader = new PdfReader(_pdfFile, password);

            PdfDictionary catalog = reader.Catalog;

            documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

            if (documentNames != null)
            {
                embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
                if (embeddedFiles != null)
                {
                    PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);

                    for (int i = 0; i < filespecs.Size; i++)
                    {
                        i++;
                        fileArray = filespecs.GetAsDict(i);
                        file = fileArray.GetAsDict(PdfName.EF);

                        foreach (PdfName key in file.Keys)
                        {
                            stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
                            string attachedFileName = fileArray.GetAsString(key).ToString();
                            byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);

                            System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
                        }

                    }
                }
                else
                    throw new Exception("Unable to Read the attachment or There may be no Attachment");
            }
            else
            {
                throw new Exception("Unable to Read the document");
            }

        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.ToString());
            Console.ReadKey();
        }
    }

    private void EmbedAttachments()
    {
        try
        {

            if (File.Exists(pdfFile))
                File.Delete(pdfFile);

            Document PDFD = new Document(PageSize.LETTER);



            PdfWriter writer;
            writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));

            PDFD.Open();
            PDFD.NewPage();
            PDFD.Add(new Paragraph("This is test"));

            PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, @"C:\PDFReader\1.xls", "11.xls", null);

            //PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
            writer.AddFileAttachment(pfs);
            //writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\\test.xml")));

            //writer.Close();
            PDFD.Close();

            Program pgm=new Program();

            using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
                {
                    PdfReader reader = new PdfReader(input);
                    PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
                }
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.StackTrace.ToString());
            Console.ReadKey();
        }
    }
}

上述代码包含使用excel附件创建加密PDF并提取相同内容。

现在真正的问题在于我已经拥有的文件作为需求文档(我不能共享文件),它也有像我的例子一样的excel附件。

但上述代码适用于我创建的安全PDF,但不适用于实际的安全PDF。

在调试时,我发现问题是使用以下代码

documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

其中,

catalog.Get(PdfName.NAMES)

返回为NULL,由我创建的文件,提供预期的输出。

请指导我以上。

TIA。

1 个答案:

答案 0 :(得分:0)

正如mkl建议的那样,它已作为带注释的附件附加。但是提供了示例中使用的引用 ZipFile 不再支持方法。因此,我在下面找到了一个替代代码。

public void ExtractAttachments(byte[] src)
    {
        PRStream stream = null;
        string attExtPath = @"C:\PDFReader\Extract\";

        if (!Directory.Exists(attExtPath))
            Directory.CreateDirectory(attExtPath);

        byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
        PdfReader reader = new PdfReader(src, password);
        for (int i = 1; i <= reader.NumberOfPages; i++)
        {
            PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
            if (array == null) continue;
            for (int j = 0; j < array.Size; j++)
            {
                PdfDictionary annot = array.GetAsDict(j);
                if (PdfName.FILEATTACHMENT.Equals(
                    annot.GetAsName(PdfName.SUBTYPE)))
                {
                    PdfDictionary fs = annot.GetAsDict(PdfName.FS);
                    PdfDictionary refs = fs.GetAsDict(PdfName.EF);
                    foreach (PdfName name in refs.Keys)
                    {
                        //zip.AddEntry(
                        //  fs.GetAsString(name).ToString(),
                        //  PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
                        //);
                        stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
                        string attachedFileName = fs.GetAsString(name).ToString();
                        var splitname = attachedFileName.Split('\\');
                        if (splitname.Length != 1)
                            attachedFileName = splitname[splitname.Length - 1].ToString();
                        byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);

                        System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
                    }
                }
            }
        }
    }

请让我知道是否可以通过任何其他方式实现。

感谢!!!