我试图在c#中使用itextsharp阅读marathi pdf,但我没有正确获取数据。 当我通过c#阅读马拉地语pdf时,会注意显示正确的数据。
public string ReadPdfFile(string fileName)
{
StringBuilder text = new StringBuilder();
if (System.IO.File.Exists(fileName))
{
PdfReader pdfReader = new PdfReader(fileName);
Workbook workbook = new Workbook();
//workbook.LoadFromFile("F:/finalexcel.xls");
Worksheet sheet = workbook.Worksheets[0];
sheet.Range["A1"].Text = "ओळखपत्र क्रमांक";
sheet.Range["B1"].Text = "मतदाराचे पुर्ण नाव";
sheet.Range["C1"].Text = "वडिलांचे नाव";
sheet.Range["D1"].Text = "घर क्रमांक";
sheet.Range["E1"].Text = "वय";
sheet.Range["F1"].Text = "लिंग ";
for (int page =1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
//ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
//string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Unicode, Encoding.Unicode, Encoding.UTF8.GetBytes(currentText)));
//MessageBox.Show(currentText);
int ab = 2;
int cd = 2;
int ef = 2;
int gh = 2;
int ij = 2;
int kl = 2;
string[] a = currentText.Split('\n');
//string[] b = currentText.Split("लऱंग").ToString();
// string[] b = currentText.Split('\n');
foreach (var item in a)
{
//MessageBox.Show(item);
if (item.Contains(":"))
{
string[] sample = item.Split(':');
if (sample[1].Length > 1 && sample[0].Contains("मतदार"))
{
sheet.Range["B" + ab].Text = sample[1];
ab++;
}
if (sample[1].Length > 1 && sample[0].Contains("ळडडऱांचे नाळ"))
{
sheet.Range["C" + cd].Text = sample[1];
cd++;
}
if (sample[1].Length > 1 && sample[0].Contains("घर क्रमांक"))
{
sheet.Range["D" + ef].Text = sample[1];
ef++;
}
if (sample[1].Length > 1 && sample[0].Contains("ळय"))
{
//MessageBox.Show(sample[1]);
string[] xyz = sample[1].Split(' ');
//MessageBox.Show(xyz[0]);
//sheet.Range["D" + gh].Text = xyz[0];
sheet.Range["E" + gh].Text = xyz[2];
sheet.Range["F" + ij].Text = sample[2];
gh++;
ij++;
}
if (sample[1].Length > 1 && sample[0].Contains("लऱंग"))
{
//MessageBox.Show("a");
}
}
else
{
//string[] sample = item.Split(' ');
if (!item.Contains("नाळ"))
{
sheet.Range["a" + kl].Text = item;
kl++;
}
}
//ab++;
}
workbook.SaveToFile("F:/finalexel.xls");
text.Append(currentText);
}
textBox1.Text = text.ToString();
pdfReader.Close();
}
return (text.ToString()).Trim();
}