我正在使用iTextSharp从PDF文档中提取文本,但是一些编码ISO-8859-1的文本文件无法正确显示。
以下是我的代码,如果有人可以帮助我,我将不胜感激。
public string ReadPdfFile(string fileName)
{
StringBuilder text = new StringBuilder();
PdfReader pdfReader = null;
try
{
if (File.Exists(fileName))
{
pdfReader = new PdfReader(fileName);
Encoding encoding = Encoding.GetEncoding("iso8859-2");
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, new LocationTextExtractionStrategy());
currentText = encoding.GetString(ASCIIEncoding.Convert(Encoding.UTF8, encoding, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
}
pdfReader.Close();
}
return text.ToString();
}
catch
{
return string.Empty;
}
finally
{
if (pdfReader != null) pdfReader.Close();
}
}