ITextSharp大部分时间都能正常工作,并且大多数错误都会在try catch块中捕获。但是,它经常会遇到一个它不喜欢的文件,并且整个应用程序都会崩溃。
当我尝试从Windows打开pdf文件时,我收到消息
"无法打开此文件。文件格式存在问题"
很明显文件已损坏。
有没有办法捕获此错误?
public string GetPDFText(int ScanId, String pdfPath, out int NumWords, out int NumCharacters)
{
string Content = "";
NumWords = 0;
NumCharacters = 0;
try
{
//PdfReader reader = new PdfReader(pdfPath);
using (PdfReader reader = new PdfReader(pdfPath))
{
StringBuilder text = new StringBuilder();
for (int i = 1; i <= reader.NumberOfPages; i++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(reader, i, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
}
Content = text.ToString();
var regex = new Regex(@"\b[\s,\.-:;]*");
var words = regex.Split(Content).Where(x => !string.IsNullOrEmpty(x));
NumWords = words.Count();
NumCharacters = Content.Count(char.IsLetterOrDigit);
}
}
catch (iTextSharp.text.DocumentException ex)
{
AddErrorLog(new ErrorLog(-1, ex.GetType().Name, MethodBase.GetCurrentMethod().Name, ScanId, pdfPath, ex.Message, ex.TargetSite.ToString(), DateTime.Now, Environment.UserName, false));
NumFileErrors++;
}
catch (iTextSharp.text.exceptions.InvalidPdfException ex)
{
AddErrorLog(new ErrorLog(-1, ex.GetType().Name, MethodBase.GetCurrentMethod().Name, ScanId, pdfPath, ex.Message, ex.TargetSite.ToString(), DateTime.Now, Environment.UserName, false));
NumFileErrors++;
}
catch (Exception ex)
{
AddErrorLog(new ErrorLog(-1, ex.GetType().Name, MethodBase.GetCurrentMethod().Name, ScanId, pdfPath, ex.Message, ex.TargetSite.ToString(), DateTime.Now, Environment.UserName, false));
NumFileErrors++;
}
return Content;
}