如果PDF文件中没有图像,我已经准确地阅读了英文PDF中的所有文本。我已经将iTextSharp dll用于此目的.pdfreader也准确地计算了PDF和PDF的所有页面。给我很好的英语成绩,但我不能从中读出孟加拉文。可以有人帮助我吗?
public void ReadPdfFile(string fileName)
{
string strText = string.Empty;
StringBuilder text = new StringBuilder();
try
{
PdfReader reader = new PdfReader((string)fileName);
if (File.Exists(fileName))
{
PdfReader pdfReader = new PdfReader(fileName);
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
text.Append(currentText);
}
pdfReader.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
richTextBox1.Text = text.ToString();
}