如何检查PDF页面是否包含图像

时间:2011-11-03 07:14:46

标签: c#-3.0 itextsharp

我正在尝试获取PDF的哪些页面包含图像的列表。我正在使用Docotic.pdf。我正在使用的代码是:

private void btnBrowse_Click(object sender, EventArgs e)
    {
        if (fbdPath.ShowDialog() == DialogResult.OK)
        {               

            txtFolderPath.Text = fbdPath.SelectedPath;
            string filepath = txtFolderPath.Text;
            System.IO.DirectoryInfo dir = new System.IO.DirectoryInfo(fbdPath.SelectedPath);
            label2.Text = "Please wait ....";
            Application.DoEvents();               
                            tb.Name = "Non-OCR PageNos";
            tb.HeaderText = "Non-OCR PageNos";
            DataGridViewComboBoxColumn tb1 = new DataGridViewComboBoxColumn();
            dgvFiles.Columns.Add(tb1);
            tb1.Name = "OCR PageNos";
            tb1.HeaderText = "OCR PageNos";
            string[] files = Directory.GetFiles(filepath, "*.pdf");
            //string[] templist = File.ReadAllLines("d:\\pdfcheck.txt");
            textBox1.Text = null;
            textBox2.Text = null;
            //StreamWriter OpStream = new StreamWriter("d:\\OCR_NonOCR_Report.csv");
            List<int> listNonOcr = new List<int>();
            List<int> listOcr = new List<int>();
            //for (int j = 0; j < templist.Length; j++)
            //{
            for (int k = 0; k < files.Count(); k++)
            {
                FileInfo f = new FileInfo(files[k]);
                //listBox1.Items.Clear();
                listBox1.Items.Add(Path.GetFileName(f.Name));

                PdfDocument pdf = new PdfDocument();
                pdf.Open(files[k]);
                int total = pdf.PageCount;
                int ocr = 0;
                for (int i = 0; i < total; i++)
                {
                    PdfPage page = pdf.Pages[i];
                    int count = page.ImageCount;
                    if (count != 0)
                    {
                        textBox1.Text =textBox1.Text+(i+1) + ",";
                        listNonOcr.Add(i + 1);
                        ocr = ocr + 1;
                        label4.Text = ocr.ToString();
                        label3.Text = (total - ocr).ToString();

                    }
                    else if (count == 0)
                    {
                        textBox2.Text = textBox2.Text + (i + 1) + ",";
                        listOcr.Add(i + 1);
                    }
                }
                label2.Text = "Ocr/Non-Ocr Search Completed";
                dgvFiles.Rows.Add(f.Name, f.Length, f.Extension, label4.Text.ToString(), label3.Text.ToString(), total.ToString(), listNonOcr, listOcr);
                //OpStream.WriteLine(f.Name + "," + f.Length + "," + f.Extension + "," + label4.Text.ToString() + "," + label3.Text.ToString() + "," + total.ToString());
                pdf.Dispose();
                //    OpStream.AutoFlush = true;
                //}
            }
            //OpStream.Close();
            //OpStream.Dispose();               



        }
    }

使用iTextsharp的解决方案也是最受欢迎的。

0 个答案:

没有答案