pdfbox问题
我使用pdfbox将文本从PDF提取到我的richtextbox。
我不知道问题是什么,但是有好的PDF但是有抛出异常的PDF,例外是:
对象引用未设置为对象的实例。
这是我的代码:
using org.pdfbox.pdmodel;
using org.pdfbox.util;
private void pdfButton_Click(object sender, EventArgs e)
{
OpenFileDialog openFD = new OpenFileDialog();
openFD.FileName = "";
openFD.InitialDirectory = "C:\\";
openFD.Filter = "All PDF Files|*.PDF";
openFD.Title = "Browse all PDF files";
if (openFD.ShowDialog() == DialogResult.OK)
{
try
{
pdf_filename = Path.GetFileNameWithoutExtension(openFD.Filename);
PDDocument pdfFile = PDDocument.load(openFD.Filename);
PDFTextStripper pdfStripper = new PDFTextStripper();
richtextBox1.Text = pdfStripper.getText(pdfFile);
textBox1.Text = Path.GetFileName(openFD.Filename);
}
catch (Exception error)
{
MessageBox.Show(error.ToString());
}
}
}
答案 0 :(得分:0)
我使用iTextSharp解决了这个问题。这是我的同事建议的,我用iTextSharp改变了PDFBox。
如果有人会遇到与我相同的问题,那么就是工作代码:
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
private void pdfButton_Click(object sender, EventArgs e)
{
OpenFileDialog openFD = new OpenFileDialog();
openFD.FileName = "";
openFD.InitialDirectory = "C:\\";
openFD.Filter = "All PDF Files|*.PDF";
openFD.Title = "Browse all PDF files";
if (openFD.ShowDialog() == DialogResult.OK)
{
try
{
pdf_filename = Path.GetFileNameWithoutExtension(openFD.Filename);
richtextBox1.Text = ReadPdf(openFD.FileName);
textBox1.Text = Path.GetFileName(openFD.Filename);
}
catch (Exception error)
{
MessageBox.Show(error.ToString());
}
}
}
private string ReadPdf(string filename)
{
if (!File.Exists(filename)) return string.Empty;
PdfReader reader = new PdfReader(filename);
string text = string.Empty;
for (int page = 1; page <= reader.NumberOfPages; page++)
{
text += PdfTextExtractor.GetTextFromPage(reader, page);
}
return text;
}