我正在尝试获取PDF的哪些页面包含图像的列表。我正在使用Docotic.pdf
。我正在使用的代码是:
private void btnBrowse_Click(object sender, EventArgs e)
{
if (fbdPath.ShowDialog() == DialogResult.OK)
{
txtFolderPath.Text = fbdPath.SelectedPath;
string filepath = txtFolderPath.Text;
System.IO.DirectoryInfo dir = new System.IO.DirectoryInfo(fbdPath.SelectedPath);
label2.Text = "Please wait ....";
Application.DoEvents();
tb.Name = "Non-OCR PageNos";
tb.HeaderText = "Non-OCR PageNos";
DataGridViewComboBoxColumn tb1 = new DataGridViewComboBoxColumn();
dgvFiles.Columns.Add(tb1);
tb1.Name = "OCR PageNos";
tb1.HeaderText = "OCR PageNos";
string[] files = Directory.GetFiles(filepath, "*.pdf");
//string[] templist = File.ReadAllLines("d:\\pdfcheck.txt");
textBox1.Text = null;
textBox2.Text = null;
//StreamWriter OpStream = new StreamWriter("d:\\OCR_NonOCR_Report.csv");
List<int> listNonOcr = new List<int>();
List<int> listOcr = new List<int>();
//for (int j = 0; j < templist.Length; j++)
//{
for (int k = 0; k < files.Count(); k++)
{
FileInfo f = new FileInfo(files[k]);
//listBox1.Items.Clear();
listBox1.Items.Add(Path.GetFileName(f.Name));
PdfDocument pdf = new PdfDocument();
pdf.Open(files[k]);
int total = pdf.PageCount;
int ocr = 0;
for (int i = 0; i < total; i++)
{
PdfPage page = pdf.Pages[i];
int count = page.ImageCount;
if (count != 0)
{
textBox1.Text =textBox1.Text+(i+1) + ",";
listNonOcr.Add(i + 1);
ocr = ocr + 1;
label4.Text = ocr.ToString();
label3.Text = (total - ocr).ToString();
}
else if (count == 0)
{
textBox2.Text = textBox2.Text + (i + 1) + ",";
listOcr.Add(i + 1);
}
}
label2.Text = "Ocr/Non-Ocr Search Completed";
dgvFiles.Rows.Add(f.Name, f.Length, f.Extension, label4.Text.ToString(), label3.Text.ToString(), total.ToString(), listNonOcr, listOcr);
//OpStream.WriteLine(f.Name + "," + f.Length + "," + f.Extension + "," + label4.Text.ToString() + "," + label3.Text.ToString() + "," + total.ToString());
pdf.Dispose();
// OpStream.AutoFlush = true;
//}
}
//OpStream.Close();
//OpStream.Dispose();
}
}
使用iTextsharp
的解决方案也是最受欢迎的。